[llvm] r356618 - [X86] Call lowerShuffleAsBitMask for 512-bit vectors in lowerShuffleAsBlend.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 20 14:30:20 PDT 2019


Author: ctopper
Date: Wed Mar 20 14:30:20 2019
New Revision: 356618

URL: http://llvm.org/viewvc/llvm-project?rev=356618&view=rev
Log:
[X86] Call lowerShuffleAsBitMask for 512-bit vectors in lowerShuffleAsBlend.

This patch enables the use of lowerShuffleAsBitMask for 512-bit blends before
falling back to move immedate, GPR to k-register, and masked op.

I had to make some changes to support v8i64 when i64 is not a legal type. And to
support floating point types.

This trades a load for the move immediate and GPR move which is higher latency.
But its probably better for register pressure not having to hop through other
register classes. The load+and should play better with LICM and
rematerialization I think.

Differential Revision: https://reviews.llvm.org/D59479

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=356618&r1=356617&r2=356618&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Mar 20 14:30:20 2019
@@ -10364,11 +10364,30 @@ static SDValue lowerShuffleWithPACK(cons
 /// one of the inputs being zeroable.
 static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
                                      SDValue V2, ArrayRef<int> Mask,
-                                     const APInt &Zeroable, SelectionDAG &DAG) {
-  assert(!VT.isFloatingPoint() && "Floating point types are not supported");
+                                     const APInt &Zeroable,
+                                     const X86Subtarget &Subtarget,
+                                     SelectionDAG &DAG) {
+  MVT MaskVT = VT;
   MVT EltVT = VT.getVectorElementType();
-  SDValue Zero = DAG.getConstant(0, DL, EltVT);
-  SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+  SDValue Zero, AllOnes;
+  // Use f64 if i64 isn't legal.
+  if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+    EltVT = MVT::f64;
+    MaskVT = MVT::getVectorVT(EltVT, Mask.size());
+  }
+
+  MVT LogicVT = VT;
+  if (EltVT == MVT::f32 || EltVT == MVT::f64) {
+    Zero = DAG.getConstantFP(0.0, DL, MVT::f64);
+    AllOnes = DAG.getConstantFP(APInt::getAllOnesValue(64).bitsToDouble(), DL,
+                                EltVT);
+    LogicVT = MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32,
+                               Mask.size());
+  } else {
+    Zero = DAG.getConstant(0, DL, EltVT);
+    AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+  }
+
   SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
   SDValue V;
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -10386,8 +10405,11 @@ static SDValue lowerShuffleAsBitMask(con
   if (!V)
     return SDValue(); // No non-zeroable elements!
 
-  SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
-  return DAG.getNode(ISD::AND, DL, VT, V, VMask);
+  SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
+  VMask = DAG.getBitcast(LogicVT, VMask);
+  V = DAG.getBitcast(LogicVT, V);
+  SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
+  return DAG.getBitcast(VT, And);
 }
 
 /// Try to emit a blend instruction for a shuffle using bit math.
@@ -10552,7 +10574,7 @@ static SDValue lowerShuffleAsBlend(const
 
     // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
     if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
-                                               DAG))
+                                               Subtarget, DAG))
       return Masked;
 
     if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
@@ -10610,6 +10632,16 @@ static SDValue lowerShuffleAsBlend(const
   case MVT::v16i32:
   case MVT::v32i16:
   case MVT::v64i8: {
+    // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
+    bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+    if (!OptForSize) {
+      if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+                                                 Subtarget, DAG))
+        return Masked;
+    }
+
+    // Otherwise load an immediate into a GPR, cast to k-register, and use a
+    // masked move.
     MVT IntegerType =
         MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
     SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
@@ -12766,7 +12798,7 @@ static SDValue lowerV4I32Shuffle(const S
       return Blend;
 
   if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
-                                             Zeroable, DAG))
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -13467,7 +13499,7 @@ static SDValue lowerV8I16Shuffle(const S
       return Blend;
 
   if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
-                                             Zeroable, DAG))
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -13735,7 +13767,7 @@ static SDValue lowerV16I8Shuffle(const S
   }
 
   if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
-                                             Zeroable, DAG))
+                                             Zeroable, Subtarget, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -15571,7 +15603,7 @@ static SDValue lower256BitShuffle(const
       // No floating point type available, if we can't use the bit operations
       // for masking/blending then decompose into 128-bit vectors.
       if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
-                                            DAG))
+                                            Subtarget, DAG))
         return V;
       if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
         return V;

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=356618&r1=356617&r2=356618&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Mar 20 14:30:20 2019
@@ -1860,25 +1860,56 @@ define <32 x i16> @test_build_vec_v32i1(
 ;
 ; SKX-LABEL: test_build_vec_v32i1:
 ; SKX:       ## %bb.0:
+; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: test_build_vec_v32i1:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: test_build_vec_v32i1:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: test_build_vec_v32i1:
+; X86:       ## %bb.0:
+; X86-NEXT:    vandps LCPI40_0, %zmm0, %zmm0
+; X86-NEXT:    retl
+  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %ret
+}
+
+define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
+; KNL-LABEL: test_build_vec_v32i1_optsize:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_build_vec_v32i1_optsize:
+; SKX:       ## %bb.0:
 ; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
 ; SKX-NEXT:    kmovd %eax, %k1
 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq
 ;
-; AVX512BW-LABEL: test_build_vec_v32i1:
+; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
 ; AVX512BW:       ## %bb.0:
 ; AVX512BW-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
 ; AVX512BW-NEXT:    kmovd %eax, %k1
 ; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512DQ-LABEL: test_build_vec_v32i1:
+; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512DQ-NEXT:    retq
 ;
-; X86-LABEL: test_build_vec_v32i1:
+; X86-LABEL: test_build_vec_v32i1_optsize:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
 ; X86-NEXT:    kmovd %eax, %k1
@@ -1928,12 +1959,12 @@ define void @ktest_1(<8 x double> %in, d
 ; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb %al, %al
-; KNL-NEXT:    je LBB42_2
+; KNL-NEXT:    je LBB43_2
 ; KNL-NEXT:  ## %bb.1: ## %L1
 ; KNL-NEXT:    vmovapd %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB42_2: ## %L2
+; KNL-NEXT:  LBB43_2: ## %L2
 ; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
@@ -1945,12 +1976,12 @@ define void @ktest_1(<8 x double> %in, d
 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
 ; SKX-NEXT:    kortestb %k0, %k0
-; SKX-NEXT:    je LBB42_2
+; SKX-NEXT:    je LBB43_2
 ; SKX-NEXT:  ## %bb.1: ## %L1
 ; SKX-NEXT:    vmovapd %zmm0, (%rdi)
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB42_2: ## %L2
+; SKX-NEXT:  LBB43_2: ## %L2
 ; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -1963,12 +1994,12 @@ define void @ktest_1(<8 x double> %in, d
 ; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
-; AVX512BW-NEXT:    je LBB42_2
+; AVX512BW-NEXT:    je LBB43_2
 ; AVX512BW-NEXT:  ## %bb.1: ## %L1
 ; AVX512BW-NEXT:    vmovapd %zmm0, (%rdi)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB42_2: ## %L2
+; AVX512BW-NEXT:  LBB43_2: ## %L2
 ; AVX512BW-NEXT:    vmovapd %zmm0, 8(%rdi)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
@@ -1980,12 +2011,12 @@ define void @ktest_1(<8 x double> %in, d
 ; AVX512DQ-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
 ; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
 ; AVX512DQ-NEXT:    kortestb %k0, %k0
-; AVX512DQ-NEXT:    je LBB42_2
+; AVX512DQ-NEXT:    je LBB43_2
 ; AVX512DQ-NEXT:  ## %bb.1: ## %L1
 ; AVX512DQ-NEXT:    vmovapd %zmm0, (%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB42_2: ## %L2
+; AVX512DQ-NEXT:  LBB43_2: ## %L2
 ; AVX512DQ-NEXT:    vmovapd %zmm0, 8(%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
@@ -1998,12 +2029,12 @@ define void @ktest_1(<8 x double> %in, d
 ; X86-NEXT:    vmovupd 8(%eax), %zmm1 {%k1} {z}
 ; X86-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
 ; X86-NEXT:    kortestb %k0, %k0
-; X86-NEXT:    je LBB42_2
+; X86-NEXT:    je LBB43_2
 ; X86-NEXT:  ## %bb.1: ## %L1
 ; X86-NEXT:    vmovapd %zmm0, (%eax)
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB42_2: ## %L2
+; X86-NEXT:  LBB43_2: ## %L2
 ; X86-NEXT:    vmovapd %zmm0, 8(%eax)
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
@@ -2052,13 +2083,13 @@ define void @ktest_2(<32 x float> %in, f
 ; KNL-NEXT:    kmovw %k0, %ecx
 ; KNL-NEXT:    shll $16, %ecx
 ; KNL-NEXT:    orl %eax, %ecx
-; KNL-NEXT:    je LBB43_2
+; KNL-NEXT:    je LBB44_2
 ; KNL-NEXT:  ## %bb.1: ## %L1
 ; KNL-NEXT:    vmovaps %zmm0, (%rdi)
 ; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB43_2: ## %L2
+; KNL-NEXT:  LBB44_2: ## %L2
 ; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
 ; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
 ; KNL-NEXT:    vzeroupper
@@ -2077,13 +2108,13 @@ define void @ktest_2(<32 x float> %in, f
 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
 ; SKX-NEXT:    kortestd %k1, %k0
-; SKX-NEXT:    je LBB43_2
+; SKX-NEXT:    je LBB44_2
 ; SKX-NEXT:  ## %bb.1: ## %L1
 ; SKX-NEXT:    vmovaps %zmm0, (%rdi)
 ; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB43_2: ## %L2
+; SKX-NEXT:  LBB44_2: ## %L2
 ; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
 ; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
 ; SKX-NEXT:    vzeroupper
@@ -2102,13 +2133,13 @@ define void @ktest_2(<32 x float> %in, f
 ; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
 ; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
 ; AVX512BW-NEXT:    kortestd %k1, %k0
-; AVX512BW-NEXT:    je LBB43_2
+; AVX512BW-NEXT:    je LBB44_2
 ; AVX512BW-NEXT:  ## %bb.1: ## %L1
 ; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
 ; AVX512BW-NEXT:    vmovaps %zmm1, 64(%rdi)
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB43_2: ## %L2
+; AVX512BW-NEXT:  LBB44_2: ## %L2
 ; AVX512BW-NEXT:    vmovaps %zmm0, 4(%rdi)
 ; AVX512BW-NEXT:    vmovaps %zmm1, 68(%rdi)
 ; AVX512BW-NEXT:    vzeroupper
@@ -2130,13 +2161,13 @@ define void @ktest_2(<32 x float> %in, f
 ; AVX512DQ-NEXT:    kmovw %k0, %ecx
 ; AVX512DQ-NEXT:    shll $16, %ecx
 ; AVX512DQ-NEXT:    orl %eax, %ecx
-; AVX512DQ-NEXT:    je LBB43_2
+; AVX512DQ-NEXT:    je LBB44_2
 ; AVX512DQ-NEXT:  ## %bb.1: ## %L1
 ; AVX512DQ-NEXT:    vmovaps %zmm0, (%rdi)
 ; AVX512DQ-NEXT:    vmovaps %zmm1, 64(%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB43_2: ## %L2
+; AVX512DQ-NEXT:  LBB44_2: ## %L2
 ; AVX512DQ-NEXT:    vmovaps %zmm0, 4(%rdi)
 ; AVX512DQ-NEXT:    vmovaps %zmm1, 68(%rdi)
 ; AVX512DQ-NEXT:    vzeroupper
@@ -2156,13 +2187,13 @@ define void @ktest_2(<32 x float> %in, f
 ; X86-NEXT:    vcmpltps %zmm2, %zmm1, %k2
 ; X86-NEXT:    kunpckwd %k1, %k2, %k1
 ; X86-NEXT:    kortestd %k1, %k0
-; X86-NEXT:    je LBB43_2
+; X86-NEXT:    je LBB44_2
 ; X86-NEXT:  ## %bb.1: ## %L1
 ; X86-NEXT:    vmovaps %zmm0, (%eax)
 ; X86-NEXT:    vmovaps %zmm1, 64(%eax)
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB43_2: ## %L2
+; X86-NEXT:  LBB44_2: ## %L2
 ; X86-NEXT:    vmovaps %zmm0, 4(%eax)
 ; X86-NEXT:    vmovaps %zmm1, 68(%eax)
 ; X86-NEXT:    vzeroupper
@@ -3175,12 +3206,12 @@ define void @ktest_signed(<16 x i32> %x,
 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testw %ax, %ax
-; KNL-NEXT:    jle LBB64_1
+; KNL-NEXT:    jle LBB65_1
 ; KNL-NEXT:  ## %bb.2: ## %bb.2
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB64_1: ## %bb.1
+; KNL-NEXT:  LBB65_1: ## %bb.1
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -3194,12 +3225,12 @@ define void @ktest_signed(<16 x i32> %x,
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    testw %ax, %ax
-; SKX-NEXT:    jle LBB64_1
+; SKX-NEXT:    jle LBB65_1
 ; SKX-NEXT:  ## %bb.2: ## %bb.2
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB64_1: ## %bb.1
+; SKX-NEXT:  LBB65_1: ## %bb.1
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -3213,12 +3244,12 @@ define void @ktest_signed(<16 x i32> %x,
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testw %ax, %ax
-; AVX512BW-NEXT:    jle LBB64_1
+; AVX512BW-NEXT:    jle LBB65_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %bb.2
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB64_1: ## %bb.1
+; AVX512BW-NEXT:  LBB65_1: ## %bb.1
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -3232,12 +3263,12 @@ define void @ktest_signed(<16 x i32> %x,
 ; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, %eax
 ; AVX512DQ-NEXT:    testw %ax, %ax
-; AVX512DQ-NEXT:    jle LBB64_1
+; AVX512DQ-NEXT:    jle LBB65_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %bb.2
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB64_1: ## %bb.1
+; AVX512DQ-NEXT:  LBB65_1: ## %bb.1
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -3251,12 +3282,12 @@ define void @ktest_signed(<16 x i32> %x,
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; X86-NEXT:    kmovd %k0, %eax
 ; X86-NEXT:    testw %ax, %ax
-; X86-NEXT:    jle LBB64_1
+; X86-NEXT:    jle LBB65_1
 ; X86-NEXT:  ## %bb.2: ## %bb.2
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB64_1: ## %bb.1
+; X86-NEXT:  LBB65_1: ## %bb.1
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -3284,11 +3315,11 @@ define void @ktest_allones(<16 x i32> %x
 ; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; CHECK-NEXT:    kortestw %k0, %k0
-; CHECK-NEXT:    jb LBB65_2
+; CHECK-NEXT:    jb LBB66_2
 ; CHECK-NEXT:  ## %bb.1: ## %bb.1
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    callq _foo
-; CHECK-NEXT:  LBB65_2: ## %bb.2
+; CHECK-NEXT:  LBB66_2: ## %bb.2
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
@@ -3300,11 +3331,11 @@ define void @ktest_allones(<16 x i32> %x
 ; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; X86-NEXT:    kortestw %k0, %k0
-; X86-NEXT:    jb LBB65_2
+; X86-NEXT:    jb LBB66_2
 ; X86-NEXT:  ## %bb.1: ## %bb.1
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
-; X86-NEXT:  LBB65_2: ## %bb.2
+; X86-NEXT:  LBB66_2: ## %bb.2
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
@@ -3492,12 +3523,12 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb %al, %al
-; KNL-NEXT:    je LBB71_1
+; KNL-NEXT:    je LBB72_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB71_1: ## %bar
+; KNL-NEXT:  LBB72_1: ## %bar
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -3514,12 +3545,12 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; SKX-NEXT:    vptestnmd %ymm3, %ymm3, %k2
 ; SKX-NEXT:    korb %k2, %k1, %k1
 ; SKX-NEXT:    ktestb %k1, %k0
-; SKX-NEXT:    je LBB71_1
+; SKX-NEXT:    je LBB72_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB71_1: ## %bar
+; SKX-NEXT:  LBB72_1: ## %bar
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -3542,12 +3573,12 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; AVX512BW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
-; AVX512BW-NEXT:    je LBB71_1
+; AVX512BW-NEXT:    je LBB72_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB71_1: ## %bar
+; AVX512BW-NEXT:  LBB72_1: ## %bar
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -3568,12 +3599,12 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; AVX512DQ-NEXT:    korb %k1, %k0, %k0
 ; AVX512DQ-NEXT:    korb %k3, %k2, %k1
 ; AVX512DQ-NEXT:    ktestb %k1, %k0
-; AVX512DQ-NEXT:    je LBB71_1
+; AVX512DQ-NEXT:    je LBB72_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB71_1: ## %bar
+; AVX512DQ-NEXT:  LBB72_1: ## %bar
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -3590,12 +3621,12 @@ define void @ktest_3(<8 x i32> %w, <8 x
 ; X86-NEXT:    vptestnmd %ymm3, %ymm3, %k2
 ; X86-NEXT:    korb %k2, %k1, %k1
 ; X86-NEXT:    ktestb %k1, %k0
-; X86-NEXT:    je LBB71_1
+; X86-NEXT:    je LBB72_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB71_1: ## %bar
+; X86-NEXT:  LBB72_1: ## %bar
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -3633,12 +3664,12 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb %al, %al
-; KNL-NEXT:    je LBB72_1
+; KNL-NEXT:    je LBB73_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB72_1: ## %bar
+; KNL-NEXT:  LBB73_1: ## %bar
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -3655,12 +3686,12 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; SKX-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; SKX-NEXT:    korb %k2, %k1, %k1
 ; SKX-NEXT:    ktestb %k1, %k0
-; SKX-NEXT:    je LBB72_1
+; SKX-NEXT:    je LBB73_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB72_1: ## %bar
+; SKX-NEXT:  LBB73_1: ## %bar
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -3679,12 +3710,12 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; AVX512BW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
-; AVX512BW-NEXT:    je LBB72_1
+; AVX512BW-NEXT:    je LBB73_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB72_1: ## %bar
+; AVX512BW-NEXT:  LBB73_1: ## %bar
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -3701,12 +3732,12 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; AVX512DQ-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; AVX512DQ-NEXT:    korb %k2, %k1, %k1
 ; AVX512DQ-NEXT:    ktestb %k1, %k0
-; AVX512DQ-NEXT:    je LBB72_1
+; AVX512DQ-NEXT:    je LBB73_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB72_1: ## %bar
+; AVX512DQ-NEXT:  LBB73_1: ## %bar
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -3723,12 +3754,12 @@ define void @ktest_4(<8 x i64> %w, <8 x
 ; X86-NEXT:    vptestnmq %zmm3, %zmm3, %k2
 ; X86-NEXT:    korb %k2, %k1, %k1
 ; X86-NEXT:    ktestb %k1, %k0
-; X86-NEXT:    je LBB72_1
+; X86-NEXT:    je LBB73_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB72_1: ## %bar
+; X86-NEXT:  LBB73_1: ## %bar
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -3765,12 +3796,12 @@ define void @ktest_5(<16 x i32> %w, <16
 ; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kortestw %k0, %k0
-; KNL-NEXT:    je LBB73_1
+; KNL-NEXT:    je LBB74_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB73_1: ## %bar
+; KNL-NEXT:  LBB74_1: ## %bar
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -3787,12 +3818,12 @@ define void @ktest_5(<16 x i32> %w, <16
 ; SKX-NEXT:    vptestnmd %zmm3, %zmm3, %k2
 ; SKX-NEXT:    korw %k2, %k1, %k1
 ; SKX-NEXT:    ktestw %k1, %k0
-; SKX-NEXT:    je LBB73_1
+; SKX-NEXT:    je LBB74_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB73_1: ## %bar
+; SKX-NEXT:  LBB74_1: ## %bar
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -3810,12 +3841,12 @@ define void @ktest_5(<16 x i32> %w, <16
 ; AVX512BW-NEXT:    korw %k2, %k1, %k1
 ; AVX512BW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512BW-NEXT:    kortestw %k0, %k0
-; AVX512BW-NEXT:    je LBB73_1
+; AVX512BW-NEXT:    je LBB74_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB73_1: ## %bar
+; AVX512BW-NEXT:  LBB74_1: ## %bar
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -3832,12 +3863,12 @@ define void @ktest_5(<16 x i32> %w, <16
 ; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k2
 ; AVX512DQ-NEXT:    korw %k2, %k1, %k1
 ; AVX512DQ-NEXT:    ktestw %k1, %k0
-; AVX512DQ-NEXT:    je LBB73_1
+; AVX512DQ-NEXT:    je LBB74_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB73_1: ## %bar
+; AVX512DQ-NEXT:  LBB74_1: ## %bar
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -3854,12 +3885,12 @@ define void @ktest_5(<16 x i32> %w, <16
 ; X86-NEXT:    vptestnmd %zmm3, %zmm3, %k2
 ; X86-NEXT:    korw %k2, %k1, %k1
 ; X86-NEXT:    ktestw %k1, %k0
-; X86-NEXT:    je LBB73_1
+; X86-NEXT:    je LBB74_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB73_1: ## %bar
+; X86-NEXT:  LBB74_1: ## %bar
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -3911,12 +3942,12 @@ define void @ktest_6(<32 x i16> %w, <32
 ; KNL-NEXT:    kmovw %k0, %ecx
 ; KNL-NEXT:    shll $16, %ecx
 ; KNL-NEXT:    orl %eax, %ecx
-; KNL-NEXT:    je LBB74_1
+; KNL-NEXT:    je LBB75_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB74_1: ## %bar
+; KNL-NEXT:  LBB75_1: ## %bar
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -3933,12 +3964,12 @@ define void @ktest_6(<32 x i16> %w, <32
 ; SKX-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; SKX-NEXT:    kord %k2, %k1, %k1
 ; SKX-NEXT:    ktestd %k1, %k0
-; SKX-NEXT:    je LBB74_1
+; SKX-NEXT:    je LBB75_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB74_1: ## %bar
+; SKX-NEXT:  LBB75_1: ## %bar
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -3955,12 +3986,12 @@ define void @ktest_6(<32 x i16> %w, <32
 ; AVX512BW-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; AVX512BW-NEXT:    kord %k2, %k1, %k1
 ; AVX512BW-NEXT:    ktestd %k1, %k0
-; AVX512BW-NEXT:    je LBB74_1
+; AVX512BW-NEXT:    je LBB75_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB74_1: ## %bar
+; AVX512BW-NEXT:  LBB75_1: ## %bar
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -3993,12 +4024,12 @@ define void @ktest_6(<32 x i16> %w, <32
 ; AVX512DQ-NEXT:    kmovw %k0, %ecx
 ; AVX512DQ-NEXT:    shll $16, %ecx
 ; AVX512DQ-NEXT:    orl %eax, %ecx
-; AVX512DQ-NEXT:    je LBB74_1
+; AVX512DQ-NEXT:    je LBB75_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB74_1: ## %bar
+; AVX512DQ-NEXT:  LBB75_1: ## %bar
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -4015,12 +4046,12 @@ define void @ktest_6(<32 x i16> %w, <32
 ; X86-NEXT:    vptestnmw %zmm3, %zmm3, %k2
 ; X86-NEXT:    kord %k2, %k1, %k1
 ; X86-NEXT:    ktestd %k1, %k0
-; X86-NEXT:    je LBB74_1
+; X86-NEXT:    je LBB75_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB74_1: ## %bar
+; X86-NEXT:  LBB75_1: ## %bar
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4096,12 +4127,12 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; KNL-NEXT:    orl %eax, %edx
 ; KNL-NEXT:    shlq $32, %rdx
 ; KNL-NEXT:    orq %rcx, %rdx
-; KNL-NEXT:    je LBB75_1
+; KNL-NEXT:    je LBB76_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
 ; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
-; KNL-NEXT:  LBB75_1: ## %bar
+; KNL-NEXT:  LBB76_1: ## %bar
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
 ; KNL-NEXT:    popq %rax
@@ -4118,12 +4149,12 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; SKX-NEXT:    vptestnmb %zmm3, %zmm3, %k2
 ; SKX-NEXT:    korq %k2, %k1, %k1
 ; SKX-NEXT:    ktestq %k1, %k0
-; SKX-NEXT:    je LBB75_1
+; SKX-NEXT:    je LBB76_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
 ; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
-; SKX-NEXT:  LBB75_1: ## %bar
+; SKX-NEXT:  LBB76_1: ## %bar
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
 ; SKX-NEXT:    popq %rax
@@ -4140,12 +4171,12 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; AVX512BW-NEXT:    vptestnmb %zmm3, %zmm3, %k2
 ; AVX512BW-NEXT:    korq %k2, %k1, %k1
 ; AVX512BW-NEXT:    ktestq %k1, %k0
-; AVX512BW-NEXT:    je LBB75_1
+; AVX512BW-NEXT:    je LBB76_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
 ; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-; AVX512BW-NEXT:  LBB75_1: ## %bar
+; AVX512BW-NEXT:  LBB76_1: ## %bar
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
 ; AVX512BW-NEXT:    popq %rax
@@ -4202,12 +4233,12 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; AVX512DQ-NEXT:    orl %eax, %edx
 ; AVX512DQ-NEXT:    shlq $32, %rdx
 ; AVX512DQ-NEXT:    orq %rcx, %rdx
-; AVX512DQ-NEXT:    je LBB75_1
+; AVX512DQ-NEXT:    je LBB76_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
 ; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
-; AVX512DQ-NEXT:  LBB75_1: ## %bar
+; AVX512DQ-NEXT:  LBB76_1: ## %bar
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
 ; AVX512DQ-NEXT:    popq %rax
@@ -4226,12 +4257,12 @@ define void @ktest_7(<64 x i8> %w, <64 x
 ; X86-NEXT:    kandq %k1, %k0, %k0
 ; X86-NEXT:    kshiftrq $32, %k0, %k1
 ; X86-NEXT:    kortestd %k1, %k0
-; X86-NEXT:    je LBB75_1
+; X86-NEXT:    je LBB76_1
 ; X86-NEXT:  ## %bb.2: ## %exit
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
-; X86-NEXT:  LBB75_1: ## %bar
+; X86-NEXT:  LBB76_1: ## %bar
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp

Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=356618&r1=356617&r2=356618&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Wed Mar 20 14:30:20 2019
@@ -128,29 +128,17 @@ define <8 x double> @merge_8f64_f64_12zz
 }
 
 define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
-; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
-; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    movb $32, %al
-; AVX512F-NEXT:    kmovw %eax, %k0
-; AVX512F-NEXT:    knotw %k0, %k1
-; AVX512F-NEXT:    vmovupd 8(%rdi), %zmm0 {%k1} {z}
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8:
-; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    movb $32, %al
-; AVX512BW-NEXT:    kmovd %eax, %k0
-; AVX512BW-NEXT:    knotw %k0, %k1
-; AVX512BW-NEXT:    vmovupd 8(%rdi), %zmm0 {%k1} {z}
-; AVX512BW-NEXT:    retq
+; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vmovdqu64 8(%rdi), %zmm0
+; ALL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
 ;
 ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
 ; X32-AVX512F:       # %bb.0:
 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    movb $32, %cl
-; X32-AVX512F-NEXT:    kmovw %ecx, %k0
-; X32-AVX512F-NEXT:    knotw %k0, %k1
-; X32-AVX512F-NEXT:    vmovupd 8(%eax), %zmm0 {%k1} {z}
+; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0
+; X32-AVX512F-NEXT:    vpandq {{\.LCPI.*}}, %zmm0, %zmm0
 ; X32-AVX512F-NEXT:    retl
   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
   %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
@@ -219,29 +207,17 @@ define <8 x i64> @merge_8i64_i64_56zz9uz
 }
 
 define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
-; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
-; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    movb $32, %al
-; AVX512F-NEXT:    kmovw %eax, %k0
-; AVX512F-NEXT:    knotw %k0, %k1
-; AVX512F-NEXT:    vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8:
-; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    movb $32, %al
-; AVX512BW-NEXT:    kmovd %eax, %k0
-; AVX512BW-NEXT:    knotw %k0, %k1
-; AVX512BW-NEXT:    vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
-; AVX512BW-NEXT:    retq
+; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vmovdqu64 8(%rdi), %zmm0
+; ALL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
 ;
 ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
 ; X32-AVX512F:       # %bb.0:
 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    movb $32, %cl
-; X32-AVX512F-NEXT:    kmovw %ecx, %k0
-; X32-AVX512F-NEXT:    knotw %k0, %k1
-; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0 {%k1} {z}
+; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0
+; X32-AVX512F-NEXT:    vpandq {{\.LCPI.*}}, %zmm0, %zmm0
 ; X32-AVX512F-NEXT:    retl
   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
   %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
@@ -450,29 +426,17 @@ define <16 x i32> @merge_16i32_i32_0uu3u
 }
 
 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
-; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
-; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    movw $8240, %ax # imm = 0x2030
-; AVX512F-NEXT:    kmovw %eax, %k0
-; AVX512F-NEXT:    knotw %k0, %k1
-; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
-; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    movw $8240, %ax # imm = 0x2030
-; AVX512BW-NEXT:    kmovd %eax, %k0
-; AVX512BW-NEXT:    knotw %k0, %k1
-; AVX512BW-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
-; AVX512BW-NEXT:    retq
+; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vmovdqu64 (%rdi), %zmm0
+; ALL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
 ;
 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
 ; X32-AVX512F:       # %bb.0:
 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    movw $8240, %cx # imm = 0x2030
-; X32-AVX512F-NEXT:    kmovw %ecx, %k0
-; X32-AVX512F-NEXT:    knotw %k0, %k1
-; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0 {%k1} {z}
+; X32-AVX512F-NEXT:    vmovdqu64 (%eax), %zmm0
+; X32-AVX512F-NEXT:    vpandd {{\.LCPI.*}}, %zmm0, %zmm0
 ; X32-AVX512F-NEXT:    retl
   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=356618&r1=356617&r2=356618&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Wed Mar 20 14:30:20 2019
@@ -203,9 +203,9 @@ define <32 x i16> @shuffle_v32i16_0zzzzz
 ;
 ; SKX-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    movl $1, %eax
-; SKX-NEXT:    kmovd %eax, %k1
-; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    movl $65535, %eax ## imm = 0xFFFF
+; SKX-NEXT:    vmovd %eax, %xmm1
+; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    retq
   %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
   ret <32 x i16> %shuffle




More information about the llvm-commits mailing list