[llvm] r275045 - [AVX512] Use vpternlog with an immediate of 0xff to create 512-bit all one vectors.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 10 22:36:49 PDT 2016


Author: ctopper
Date: Mon Jul 11 00:36:48 2016
New Revision: 275045

URL: http://llvm.org/viewvc/llvm-project?rev=275045&view=rev
Log:
[AVX512] Use vpternlog with an immediate of 0xff to create 512-bit all one vectors.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll
    llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
    llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
    llvm/trunk/test/CodeGen/X86/masked_memop.ll
    llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
    llvm/trunk/test/CodeGen/X86/vector-sext.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jul 11 00:36:48 2016
@@ -6554,11 +6554,11 @@ static SDValue materializeVectorConstant
   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
   // vpcmpeqd on 256-bit vectors.
   if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
-    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
+    if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
+        (VT == MVT::v8i32 && Subtarget.hasInt256()))
       return Op;
 
-    if (!VT.is512BitVector())
-      return getOnesVector(VT, Subtarget, DAG, DL);
+    return getOnesVector(VT, Subtarget, DAG, DL);
   }
 
   return SDValue();

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jul 11 00:36:48 2016
@@ -416,6 +416,8 @@ let isReMaterializable = 1, isAsCheapAsA
     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
+def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
+               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jul 11 00:36:48 2016
@@ -5547,6 +5547,15 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
     return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
   case X86::AVX2_SETALLONES:
     return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+  case X86::AVX512_512_SETALLONES: {
+    unsigned Reg = MIB->getOperand(0).getReg();
+    MIB->setDesc(get(X86::VPTERNLOGDZrri));
+    // VPTERNLOGD needs 3 register inputs and an immediate.
+    // 0xff will return 1s for any input.
+    MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
+       .addReg(Reg, RegState::Undef).addImm(0xff);
+    return true;
+  }
   case X86::TEST8ri_NOREX:
     MI.setDesc(get(X86::TEST8ri));
     return true;
@@ -6231,6 +6240,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
   else
     switch (LoadMI.getOpcode()) {
     case X86::AVX512_512_SET0:
+    case X86::AVX512_512_SETALLONES:
       Alignment = 64;
       break;
     case X86::AVX2_SETALLONES:
@@ -6281,6 +6291,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
   case X86::AVX512_128_SET0:
   case X86::AVX512_256_SET0:
   case X86::AVX512_512_SET0:
+  case X86::AVX512_512_SETALLONES:
   case X86::FsFLD0SD:
   case X86::FsFLD0SS: {
     // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@@ -6312,7 +6323,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
     else if (Opc == X86::FsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
-    else if (Opc == X86::AVX512_512_SET0)
+    else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
     else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
              Opc == X86::AVX512_256_SET0)
@@ -6320,7 +6331,8 @@ MachineInstr *X86InstrInfo::foldMemoryOp
     else
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
 
-    bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
+    bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
+                      Opc == X86::AVX512_512_SETALLONES);
     const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
                                     Constant::getNullValue(Ty);
     unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);

Modified: llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-build-vector.ll Mon Jul 11 00:36:48 2016
@@ -4,7 +4,8 @@
 define <16 x i32> @test2(<16 x i32> %x) {
 ; CHECK-LABEL: test2:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
    %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
    ret <16 x i32>%res
@@ -15,8 +16,8 @@ define <16 x float> @test3(<4 x float> %
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vmovss %xmm0, %xmm2, %xmm0
-; CHECK-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 ; CHECK-NEXT:    vxorps %ymm1, %ymm1, %ymm1

Modified: llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-calling-conv.ll Mon Jul 11 00:36:48 2016
@@ -30,7 +30,8 @@ define <16 x i1> @test2(<16 x i1>%a, <16
 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -52,7 +53,8 @@ define <16 x i1> @test2(<16 x i1>%a, <16
 ; KNL_X32-NEXT:    vpslld $31, %zmm0, %zmm0
 ; KNL_X32-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; KNL_X32-NEXT:    vptestmd %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT:    vpbroadcastd LCPI1_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL_X32-NEXT:    retl
   %c = and <16 x i1>%a, %b
@@ -68,7 +70,8 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x
 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqw %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -91,7 +94,7 @@ define <8 x i1> @test3(<8 x i1>%a, <8 x
 ; KNL_X32-NEXT:    vpsllvq %zmm2, %zmm0, %zmm0
 ; KNL_X32-NEXT:    vptestmq %zmm0, %zmm0, %k1
 ; KNL_X32-NEXT:    vptestmq %zmm1, %zmm1, %k1 {%k1}
-; KNL_X32-NEXT:    vpbroadcastd LCPI2_1, %zmm0
+; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; KNL_X32-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL_X32-NEXT:    vpmovqw %zmm0, %xmm0
 ; KNL_X32-NEXT:    retl
@@ -183,7 +186,8 @@ define <16 x i32> @test6(<16 x i32>%a, <
 ; KNL-NEXT:  Ltmp1:
 ; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    callq _func16xi1
 ; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -212,7 +216,8 @@ define <16 x i32> @test6(<16 x i32>%a, <
 ; KNL_X32-NEXT:  Ltmp1:
 ; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
 ; KNL_X32-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
-; KNL_X32-NEXT:    vpbroadcastd LCPI5_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL_X32-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL_X32-NEXT:    calll _func16xi1
 ; KNL_X32-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
@@ -286,7 +291,8 @@ define <8 x i1> @test7a(<8 x i32>%a, <8
 ; KNL-NEXT:    movb $85, %al
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqw %zmm0, %xmm0
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    retq
@@ -322,7 +328,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8
 ; KNL_X32-NEXT:    movb $85, %al
 ; KNL_X32-NEXT:    kmovw %eax, %k1
 ; KNL_X32-NEXT:    vptestmq %zmm0, %zmm0, %k1 {%k1}
-; KNL_X32-NEXT:    vpbroadcastd LCPI7_1, %zmm0
+; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; KNL_X32-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL_X32-NEXT:    vpmovqw %zmm0, %xmm0
 ; KNL_X32-NEXT:    addl $12, %esp

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Jul 11 00:36:48 2016
@@ -685,7 +685,8 @@ define <16 x float> @sitofp_16i1_float(<
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vcvtdq2ps %zmm0, %zmm0
 ; KNL-NEXT:    retq
 ;
@@ -748,7 +749,8 @@ define <8 x double> @sitofp_8i1_double(<
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
 ; KNL-NEXT:    vcvtdq2pd %ymm0, %zmm0
 ; KNL-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Mon Jul 11 00:36:48 2016
@@ -1409,7 +1409,8 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
 ; KNL-NEXT:    knotw %k0, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
 ; KNL-NEXT:    retq
 ;
@@ -1465,7 +1466,8 @@ define <16 x i32> @sext_16i1_16i32(<16 x
 ; KNL-LABEL: sext_16i1_16i32:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: sext_16i1_16i32:

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Jul 11 00:36:48 2016
@@ -349,7 +349,8 @@ define <16 x i8> @test8(<16 x i32>%a, <1
 ; KNL-NEXT:  LBB17_1:
 ; KNL-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1
 ; KNL-NEXT:  LBB17_3:
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -386,7 +387,8 @@ define <16 x i1> @test9(<16 x i1>%a, <16
 ; KNL-NEXT:  LBB18_3:
 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -475,7 +477,8 @@ define <16 x i1> @test15(i32 %x, i32 %y)
 ; KNL-NEXT:    movw $1, %cx
 ; KNL-NEXT:    cmovgw %ax, %cx
 ; KNL-NEXT:    kmovw %ecx, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -512,25 +515,25 @@ define <64 x i8> @test16(i64 %x) {
 ; KNL-NEXT:    movl %edi, (%rsp)
 ; KNL-NEXT:    shrq $32, %rdi
 ; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    movl {{.*}}(%rip), %eax
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; KNL-NEXT:    kmovw (%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm0, %xmm0
-; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT:    movl $1, %ecx
-; KNL-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm2
+; KNL-NEXT:    movl $1, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm2 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm2, %xmm2
-; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
-; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; KNL-NEXT:    vpsllw $7, %ymm2, %ymm0
 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 ; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
@@ -570,30 +573,30 @@ define <64 x i8> @test17(i64 %x, i32 %y,
 ; KNL-NEXT:    movl %edi, (%rsp)
 ; KNL-NEXT:    shrq $32, %rdi
 ; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    movl {{.*}}(%rip), %eax
+; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; KNL-NEXT:    kmovw (%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
-; KNL-NEXT:    xorl %ecx, %ecx
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
+; KNL-NEXT:    xorl %eax, %eax
 ; KNL-NEXT:    cmpl %edx, %esi
-; KNL-NEXT:    setg %cl
-; KNL-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT:    setg %al
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
 ; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
-; KNL-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
-; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; KNL-NEXT:    movq %rbp, %rsp
 ; KNL-NEXT:    popq %rbp
 ; KNL-NEXT:    retq
@@ -628,7 +631,8 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    kshiftlw $7, %k2, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqw %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;
@@ -1368,7 +1372,8 @@ define <8 x i64> @load_8i1(<8 x i1>* %a)
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: load_8i1:
@@ -1385,7 +1390,8 @@ define <16 x i32> @load_16i1(<16 x i1>*
 ; KNL-LABEL: load_16i1:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    kmovw (%rdi), %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: load_16i1:
@@ -1403,7 +1409,8 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
 ; KNL-NEXT:    retq
 ;
@@ -1422,7 +1429,8 @@ define <4 x i16> @load_4i1(<4 x i1>* %a)
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
 ; KNL-NEXT:    retq
@@ -1441,11 +1449,11 @@ define <32 x i16> @load_32i1(<32 x i1>*
 ; KNL-LABEL: load_32i1:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    kmovw (%rdi), %k1
-; KNL-NEXT:    movl {{.*}}(%rip), %eax
-; KNL-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
 ; KNL-NEXT:    kmovw 2(%rdi), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
 ; KNL-NEXT:    vpmovdw %zmm1, %ymm1
 ; KNL-NEXT:    retq
 ;
@@ -1463,20 +1471,20 @@ define <64 x i8> @load_64i1(<64 x i1>* %
 ; KNL-LABEL: load_64i1:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    kmovw (%rdi), %k1
-; KNL-NEXT:    movl {{.*}}(%rip), %eax
-; KNL-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    kmovw 2(%rdi), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm2, %xmm2
+; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; KNL-NEXT:    kmovw 4(%rdi), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    kmovw 6(%rdi), %k1
-; KNL-NEXT:    vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
-; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT:    kmovw 6(%rdi), %k1
+; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: load_64i1:

Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcast.ll Mon Jul 11 00:36:48 2016
@@ -218,7 +218,8 @@ define <16 x i32> @test_vbroadcast() {
 ; ALL:       # BB#0: # %entry
 ; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
 ; ALL-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
-; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; ALL-NEXT:    knotw %k1, %k1
 ; ALL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; ALL-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Mon Jul 11 00:36:48 2016
@@ -865,7 +865,8 @@ define <8 x i32>@test28(<8 x i64> %x, <8
 ; KNL-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
 ; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
 ; KNL-NEXT:    kxnorw %k1, %k0, %k1
-; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
 ; KNL-NEXT:    retq
 ;
@@ -889,7 +890,8 @@ define <16 x i8>@test29(<16 x i32> %x, <
 ; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
 ; KNL-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1
 ; KNL-NEXT:    kxorw %k1, %k0, %k1
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Jul 11 00:36:48 2016
@@ -2473,7 +2473,8 @@ define <16 x i8> @test_mask_load_16xi8(<
 ; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 ; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_32: ## %else44
-; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
 ; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
 ; AVX512F-NEXT:    vpblendvb %xmm1, %xmm0, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
@@ -5676,7 +5677,8 @@ define <8 x i16> @test_mask_load_8xi16(<
 ; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 ; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_16: ## %else20
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm1 {%k1} {z}
 ; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm1, %xmm2
@@ -6116,7 +6118,8 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm1, %xmm1
 ; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:  LBB54_32: ## %else44
-; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
 ; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
 ; AVX512F-NEXT:    vpand %ymm0, %ymm1, %ymm0
 ; AVX512F-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Mon Jul 11 00:36:48 2016
@@ -706,7 +706,8 @@ define <8 x i1> @test_cmp_v8f64(<8 x dou
 ; AVX512-LABEL: test_cmp_v8f64:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = fcmp ogt <8 x double> %a0, %a1
@@ -767,7 +768,8 @@ define <16 x i1> @test_cmp_v16f32(<16 x
 ; AVX512-LABEL: test_cmp_v16f32:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vcmpltps %zmm0, %zmm1, %k1
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = fcmp ogt <16 x float> %a0, %a1
@@ -890,7 +892,8 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64
 ; AVX512-LABEL: test_cmp_v8i64:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = icmp sgt <8 x i64> %a0, %a1
@@ -954,7 +957,8 @@ define <16 x i1> @test_cmp_v16i32(<16 x
 ; AVX512-LABEL: test_cmp_v16i32:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = icmp sgt <16 x i32> %a0, %a1

Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Mon Jul 11 00:36:48 2016
@@ -785,7 +785,8 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movzbl (%rdi), %eax
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
 ; AVX512-NEXT:    retq
 ;
@@ -966,7 +967,8 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movzbl (%rdi), %eax
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
 ; AVX512-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
 ; AVX512-NEXT:    retq
@@ -1162,7 +1164,8 @@ define <4 x i64> @load_sext_4i1_to_4i64(
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movzbl (%rdi), %eax
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
 ; AVX512-NEXT:    retq
 ;
@@ -1455,7 +1458,8 @@ define <8 x i16> @load_sext_8i1_to_8i16(
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movzbl (%rdi), %eax
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
 ; AVX512-NEXT:    retq
 ;
@@ -1848,7 +1852,8 @@ define <8 x i32> @load_sext_8i1_to_8i32(
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movzbl (%rdi), %eax
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
@@ -2350,7 +2355,8 @@ define <16 x i8> @load_sext_16i1_to_16i8
 ; AVX512-LABEL: load_sext_16i1_to_16i8:
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    kmovw (%rdi), %k1
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    retq
 ;
@@ -2887,7 +2893,8 @@ define <16 x i16> @load_sext_16i1_to_16i
 ; AVX512-LABEL: load_sext_16i1_to_16i16:
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    kmovw (%rdi), %k1
-; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
@@ -3731,13 +3738,13 @@ define <32 x i8> @load_sext_32i1_to_32i8
 ; AVX512-LABEL: load_sext_32i1_to_32i8:
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    kmovw (%rdi), %k1
-; AVX512-NEXT:    movl {{.*}}(%rip), %eax
-; AVX512-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
-; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512-NEXT:    kmovw 2(%rdi), %k1
-; AVX512-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
 ; AVX512-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    kmovw 2(%rdi), %k1
+; AVX512-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: load_sext_32i1_to_32i8:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Mon Jul 11 00:36:48 2016
@@ -358,9 +358,9 @@ define <64 x i8> @combine_pshufb_identit
 ; CHECK-LABEL: combine_pshufb_identity_mask:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    kmovq %rdi, %k1
-; CHECK-NEXT:    vmovdqu8 {{.*#+}} zmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; CHECK-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; CHECK-NEXT:    vmovdqu8 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpternlogd $255, %zmm3, %zmm3, %zmm3
 ; CHECK-NEXT:    vpshufb %zmm2, %zmm0, %zmm3 {%k1}
 ; CHECK-NEXT:    vpshufb %zmm2, %zmm3, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=275045&r1=275044&r2=275045&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Mon Jul 11 00:36:48 2016
@@ -74,13 +74,13 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0
 ; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
-; AVX512F-NEXT:    movq {{.*}}(%rip), %rax
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
-; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT:    vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -105,14 +105,14 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
 ; AVX512F-NEXT:    vpcmpeqd %zmm3, %zmm1, %k2
-; AVX512F-NEXT:    movl {{.*}}(%rip), %eax
-; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k2} {z}
-; AVX512F-NEXT:    vpbroadcastd %eax, %zmm1 {%k1} {z}
-; AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
-; AVX512F-NEXT:    vpermt2d %zmm0, %zmm2, %zmm1
-; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
-; AVX512F-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT:    vpermt2d %zmm1, %zmm3, %zmm2
+; AVX512F-NEXT:    vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -163,13 +163,13 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
 ; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    movq {{.*}}(%rip), %rax
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm0 {%k1} {z}
-; AVX512F-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
-; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
-; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vextracti32x4 $1, %zmm1, %xmm1
+; AVX512F-NEXT:    vpbroadcastq %xmm1, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
 ; AVX512F-NEXT:    retq
 ;
@@ -192,7 +192,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %
 ; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
@@ -224,7 +225,8 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a
 ; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5,0,1,0,1]
 ; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -252,7 +254,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a
 ; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
@@ -284,7 +287,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %
 ; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
 ; AVX512F-NEXT:    vpxord %zmm2, %zmm2, %zmm2
 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
@@ -316,14 +320,14 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8
 ; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    movq {{.*}}(%rip), %rax
-; AVX512F-NEXT:    movb $51, %cl
-; AVX512F-NEXT:    kmovw %ecx, %k2
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm0 {%k2} {z}
-; AVX512F-NEXT:    vpbroadcastq %rax, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    movb $51, %al
+; AVX512F-NEXT:    kmovw %eax, %k2
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
-; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
-; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
@@ -355,11 +359,11 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
 ; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm2
-; AVX512F-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
@@ -371,7 +375,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_
 ; VL_BW_DQ-NEXT:    vpmovw2m %xmm0, %k0
 ; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
 ; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
-; VL_BW_DQ-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm2
+; VL_BW_DQ-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
 ; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
 ; VL_BW_DQ-NEXT:    vpsllq $63, %zmm2, %zmm0
 ; VL_BW_DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -388,7 +392,8 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0
 ; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
 ; AVX512F:       # BB#0:
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpbroadcastd %xmm0, %zmm0
 ; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -426,8 +431,9 @@ define i64 @shuf64i1_zero(i64 %a) {
 ; AVX512F-NEXT:    andq $-32, %rsp
 ; AVX512F-NEXT:    subq $96, %rsp
 ; AVX512F-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
 ; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1




More information about the llvm-commits mailing list