[llvm] r350918 - [X86] Change vXi1 extract_vector_elt lowering to be legal if the index is 0. Add DAG combine to turn scalar_to_vector+extract_vector_elt into extract_subvector.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 10 21:44:57 PST 2019


Author: ctopper
Date: Thu Jan 10 21:44:56 2019
New Revision: 350918

URL: http://llvm.org/viewvc/llvm-project?rev=350918&view=rev
Log:
[X86] Change vXi1 extract_vector_elt lowering to be legal if the index is 0. Add DAG combine to turn scalar_to_vector+extract_vector_elt into extract_subvector.

We were lowering the last step extract_vector_elt to a bitcast+truncate. Change it to use an extract_vector_elt of index 0 instead. Add isel patterns to do the equivalent of what the bitcast would have done. Plus an isel pattern for an any_extend+extract to prevent some regressions.

Finally add a DAG combine to turn v1i1 scalar_to_vector+extract_vector_elt of 0 into an extract_subvector.

This fixes some of the regressions from D350800.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/CodeGen/X86/pr33349.ll
    llvm/trunk/test/CodeGen/X86/vec_floor.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 10 21:44:56 2019
@@ -16296,34 +16296,25 @@ static SDValue ExtractBitFromMaskVector(
   }
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0) // the operation is legal
+    return Op;
 
-  // If the kshift instructions of the correct width aren't natively supported
-  // then we need to promote the vector to the native size to get the correct
-  // zeroing behavior.
-  if (VecVT.getVectorNumElements() < 16) {
-    VecVT = MVT::v16i1;
-    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
-                      DAG.getUNDEF(VecVT), Vec,
+  // Extend to natively supported kshift.
+  unsigned NumElems = VecVT.getVectorNumElements();
+  MVT WideVecVT = VecVT;
+  if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+    WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+                      DAG.getUNDEF(WideVecVT), Vec,
                       DAG.getIntPtrConstant(0, dl));
   }
 
-  // Extracts from element 0 are always allowed.
-  if (IdxVal != 0) {
-    // Use kshiftr instruction to move to the lower element.
-    Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
-                      DAG.getConstant(IdxVal, dl, MVT::i8));
-  }
-
-  // Shrink to v16i1 since that's always legal.
-  if (VecVT.getVectorNumElements() > 16) {
-    VecVT = MVT::v16i1;
-    Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
-                      DAG.getIntPtrConstant(0, dl));
-  }
+  // Use kshiftr instruction to move to the lower element.
+  Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+                    DAG.getConstant(IdxVal, dl, MVT::i8));
 
-  // Convert to a bitcast+aext/trunc.
-  MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
-  return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+                     DAG.getIntPtrConstant(0, dl));
 }
 
 SDValue
@@ -41413,6 +41404,15 @@ static SDValue combineScalarToVector(SDN
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
                            Src.getOperand(0));
 
+  // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
+  if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
+      Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+    if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
+      if (C->isNullValue())
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
+                           Src.getOperand(0), Src.getOperand(1));
+
   return SDValue();
 }
 

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Jan 10 21:44:56 2019
@@ -2889,6 +2889,11 @@ let Predicates = [HasAVX512] in {
             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
 }
 
+def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
+                                              SDTCVecEltisVT<1, i1>,
+                                              SDTCisPtrTy<2>]>>;
+
 let Predicates = [HasAVX512] in {
   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
@@ -2896,6 +2901,12 @@ let Predicates = [HasAVX512] in {
 
     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
+
+    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
+              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
+
+    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
+              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
   }
 
   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Thu Jan 10 21:44:56 2019
@@ -270,7 +270,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    movq %rdi, %rax
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
-; SKX-NEXT:    kshiftrw $4, %k0, %k0
+; SKX-NEXT:    kshiftrb $4, %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %ecx
 ; SKX-NEXT:    testb $1, %cl
 ; SKX-NEXT:    cmoveq %rsi, %rax
@@ -976,7 +976,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX-LABEL: test_extractelement_v4i1:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0
-; SKX-NEXT:    kshiftrw $3, %k0, %k0
+; SKX-NEXT:    kshiftrb $3, %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Jan 10 21:44:56 2019
@@ -1281,19 +1281,15 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ; KNL-NEXT:    kmovw %edi, %k0
 ; KNL-NEXT:    kmovw %esi, %k1
 ; KNL-NEXT:    kshiftrw $8, %k1, %k2
-; KNL-NEXT:    kmovw %k2, %eax
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
-; KNL-NEXT:    kmovw %k1, %ecx
-; KNL-NEXT:    kshiftrw $6, %k0, %k1
-; KNL-NEXT:    kmovw %ecx, %k2
-; KNL-NEXT:    kxorw %k2, %k1, %k1
+; KNL-NEXT:    kshiftrw $6, %k0, %k3
+; KNL-NEXT:    kxorw %k1, %k3, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
 ; KNL-NEXT:    kxorw %k1, %k0, %k0
 ; KNL-NEXT:    kshiftlw $9, %k0, %k0
 ; KNL-NEXT:    kshiftrw $9, %k0, %k0
-; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kshiftlw $7, %k1, %k1
+; KNL-NEXT:    kshiftlw $7, %k2, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k1
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
@@ -1303,37 +1299,37 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ;
 ; SKX-LABEL: test18:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    kmovd %edi, %k1
-; SKX-NEXT:    kmovd %esi, %k2
-; SKX-NEXT:    kshiftrw $8, %k2, %k0
-; SKX-NEXT:    kshiftrw $9, %k2, %k2
-; SKX-NEXT:    kshiftrb $6, %k1, %k3
-; SKX-NEXT:    kxorb %k2, %k3, %k2
-; SKX-NEXT:    kshiftlb $7, %k2, %k2
-; SKX-NEXT:    kshiftrb $1, %k2, %k2
-; SKX-NEXT:    kxorb %k2, %k1, %k1
-; SKX-NEXT:    kshiftlb $1, %k1, %k1
+; SKX-NEXT:    kmovd %edi, %k0
+; SKX-NEXT:    kmovd %esi, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    kshiftrw $9, %k1, %k1
+; SKX-NEXT:    kshiftrb $6, %k0, %k3
+; SKX-NEXT:    kxorb %k1, %k3, %k1
+; SKX-NEXT:    kshiftlb $7, %k1, %k1
 ; SKX-NEXT:    kshiftrb $1, %k1, %k1
-; SKX-NEXT:    kshiftlb $7, %k0, %k0
-; SKX-NEXT:    korb %k0, %k1, %k0
+; SKX-NEXT:    kxorb %k1, %k0, %k0
+; SKX-NEXT:    kshiftlb $1, %k0, %k0
+; SKX-NEXT:    kshiftrb $1, %k0, %k0
+; SKX-NEXT:    kshiftlb $7, %k2, %k1
+; SKX-NEXT:    korb %k1, %k0, %k0
 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test18:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    kmovd %edi, %k1
-; AVX512BW-NEXT:    kmovd %esi, %k2
-; AVX512BW-NEXT:    kshiftrw $8, %k2, %k0
-; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
-; AVX512BW-NEXT:    kshiftrw $6, %k1, %k3
-; AVX512BW-NEXT:    kxorw %k2, %k3, %k2
-; AVX512BW-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
-; AVX512BW-NEXT:    kxorw %k2, %k1, %k1
-; AVX512BW-NEXT:    kshiftlw $9, %k1, %k1
+; AVX512BW-NEXT:    kmovd %edi, %k0
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    kshiftrw $8, %k1, %k2
 ; AVX512BW-NEXT:    kshiftrw $9, %k1, %k1
-; AVX512BW-NEXT:    kshiftlw $7, %k0, %k0
-; AVX512BW-NEXT:    korw %k0, %k1, %k0
+; AVX512BW-NEXT:    kshiftrw $6, %k0, %k3
+; AVX512BW-NEXT:    kxorw %k1, %k3, %k1
+; AVX512BW-NEXT:    kshiftlw $15, %k1, %k1
+; AVX512BW-NEXT:    kshiftrw $9, %k1, %k1
+; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
+; AVX512BW-NEXT:    kshiftlw $9, %k0, %k0
+; AVX512BW-NEXT:    kshiftrw $9, %k0, %k0
+; AVX512BW-NEXT:    kshiftlw $7, %k2, %k1
+; AVX512BW-NEXT:    korw %k1, %k0, %k0
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512BW-NEXT:    vzeroupper
@@ -1344,19 +1340,15 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ; AVX512DQ-NEXT:    kmovw %edi, %k0
 ; AVX512DQ-NEXT:    kmovw %esi, %k1
 ; AVX512DQ-NEXT:    kshiftrw $8, %k1, %k2
-; AVX512DQ-NEXT:    kmovw %k2, %eax
 ; AVX512DQ-NEXT:    kshiftrw $9, %k1, %k1
-; AVX512DQ-NEXT:    kmovw %k1, %ecx
-; AVX512DQ-NEXT:    kshiftrb $6, %k0, %k1
-; AVX512DQ-NEXT:    kmovw %ecx, %k2
-; AVX512DQ-NEXT:    kxorb %k2, %k1, %k1
+; AVX512DQ-NEXT:    kshiftrb $6, %k0, %k3
+; AVX512DQ-NEXT:    kxorb %k1, %k3, %k1
 ; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrb $1, %k1, %k1
 ; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
 ; AVX512DQ-NEXT:    kshiftrb $1, %k0, %k0
-; AVX512DQ-NEXT:    kmovw %eax, %k1
-; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
+; AVX512DQ-NEXT:    kshiftlb $7, %k2, %k1
 ; AVX512DQ-NEXT:    korb %k1, %k0, %k0
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
@@ -1368,16 +1360,16 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ; X86:       ## %bb.0:
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    kshiftrw $9, %k1, %k2
-; X86-NEXT:    kshiftrw $8, %k1, %k1
-; X86-NEXT:    kshiftlb $7, %k1, %k1
+; X86-NEXT:    kshiftrw $8, %k1, %k2
+; X86-NEXT:    kshiftrw $9, %k1, %k1
 ; X86-NEXT:    kshiftrb $6, %k0, %k3
-; X86-NEXT:    kxorb %k2, %k3, %k2
-; X86-NEXT:    kshiftlb $7, %k2, %k2
-; X86-NEXT:    kshiftrb $1, %k2, %k2
-; X86-NEXT:    kxorb %k2, %k0, %k0
+; X86-NEXT:    kxorb %k1, %k3, %k1
+; X86-NEXT:    kshiftlb $7, %k1, %k1
+; X86-NEXT:    kshiftrb $1, %k1, %k1
+; X86-NEXT:    kxorb %k1, %k0, %k0
 ; X86-NEXT:    kshiftlb $1, %k0, %k0
 ; X86-NEXT:    kshiftrb $1, %k0, %k0
+; X86-NEXT:    kshiftlb $7, %k2, %k1
 ; X86-NEXT:    korb %k1, %k0, %k0
 ; X86-NEXT:    vpmovm2w %k0, %xmm0
 ; X86-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Jan 10 21:44:56 2019
@@ -7300,37 +7300,37 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
 ; GENERIC-LABEL: vmov_test18:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %esi, %k2 # sched: [1:0.33]
-; GENERIC-NEXT:    kshiftrw $8, %k2, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    kshiftrw $9, %k2, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kshiftrb $6, %k1, %k3 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:0.33]
-; GENERIC-NEXT:    kshiftlb $7, %k2, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kshiftrb $1, %k2, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kshiftlb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftrw $8, %k1, %k2 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $9, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrb $6, %k0, %k3 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorb %k1, %k3, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftlb $7, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrb $1, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    kshiftlb $7, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftlb $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrb $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftlb $7, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    korb %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test18:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT:    kmovd %esi, %k2 # sched: [1:1.00]
-; SKX-NEXT:    kshiftrw $8, %k2, %k0 # sched: [3:1.00]
-; SKX-NEXT:    kshiftrw $9, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT:    kshiftrb $6, %k1, %k3 # sched: [3:1.00]
-; SKX-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:1.00]
-; SKX-NEXT:    kshiftlb $7, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT:    kshiftrb $1, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:1.00]
-; SKX-NEXT:    kshiftlb $1, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT:    kmovd %esi, %k1 # sched: [1:1.00]
+; SKX-NEXT:    kshiftrw $8, %k1, %k2 # sched: [3:1.00]
+; SKX-NEXT:    kshiftrw $9, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    kshiftrb $6, %k0, %k3 # sched: [3:1.00]
+; SKX-NEXT:    kxorb %k1, %k3, %k1 # sched: [1:1.00]
+; SKX-NEXT:    kshiftlb $7, %k1, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    kshiftrb $1, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT:    kshiftlb $7, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
+; SKX-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; SKX-NEXT:    kshiftlb $1, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    kshiftrb $1, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT:    kshiftlb $7, %k2, %k1 # sched: [3:1.00]
+; SKX-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = bitcast i8 %a to <8 x i1>

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Jan 10 21:44:56 2019
@@ -1743,7 +1743,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX-NEXT:    vmovq %xmm0, %rax
 ; SKX-NEXT:    vpinsrd $0, (%rax), %xmm3, %xmm3
 ; SKX-NEXT:  .LBB31_2: # %else
-; SKX-NEXT:    kshiftrw $1, %k0, %k1
+; SKX-NEXT:    kshiftrb $1, %k0, %k1
 ; SKX-NEXT:    kmovw %k1, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    je .LBB31_4
@@ -1751,7 +1751,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax
 ; SKX-NEXT:    vpinsrd $1, (%rax), %xmm3, %xmm3
 ; SKX-NEXT:  .LBB31_4: # %else2
-; SKX-NEXT:    kshiftrw $2, %k0, %k0
+; SKX-NEXT:    kshiftrb $2, %k0, %k0
 ; SKX-NEXT:    kmovw %k0, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    je .LBB31_6
@@ -1781,7 +1781,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX_32-NEXT:    vmovd %xmm1, %eax
 ; SKX_32-NEXT:    vpinsrd $0, (%eax), %xmm0, %xmm0
 ; SKX_32-NEXT:  .LBB31_2: # %else
-; SKX_32-NEXT:    kshiftrw $1, %k0, %k1
+; SKX_32-NEXT:    kshiftrb $1, %k0, %k1
 ; SKX_32-NEXT:    kmovw %k1, %eax
 ; SKX_32-NEXT:    testb $1, %al
 ; SKX_32-NEXT:    je .LBB31_4
@@ -1789,7 +1789,7 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX_32-NEXT:    vpextrd $1, %xmm1, %eax
 ; SKX_32-NEXT:    vpinsrd $1, (%eax), %xmm0, %xmm0
 ; SKX_32-NEXT:  .LBB31_4: # %else2
-; SKX_32-NEXT:    kshiftrw $2, %k0, %k0
+; SKX_32-NEXT:    kshiftrb $2, %k0, %k0
 ; SKX_32-NEXT:    kmovw %k0, %eax
 ; SKX_32-NEXT:    testb $1, %al
 ; SKX_32-NEXT:    je .LBB31_6

Modified: llvm/trunk/test/CodeGen/X86/pr33349.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr33349.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr33349.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr33349.ll Thu Jan 10 21:44:56 2019
@@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gn
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    kshiftrw $2, %k0, %k1
+; KNL-NEXT:    kshiftrw $1, %k1, %k2
+; KNL-NEXT:    kmovw %k1, %ecx
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld1
 ; KNL-NEXT:    fldz
 ; KNL-NEXT:    fld %st(0)
 ; KNL-NEXT:    fcmovne %st(2), %st(0)
-; KNL-NEXT:    kshiftrw $2, %k0, %k1
-; KNL-NEXT:    kshiftrw $1, %k1, %k2
-; KNL-NEXT:    kmovw %k2, %eax
-; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    testb $1, %cl
 ; KNL-NEXT:    fld %st(1)
 ; KNL-NEXT:    fcmovne %st(3), %st(0)
-; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    kmovw %k2, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld %st(2)
 ; KNL-NEXT:    fcmovne %st(4), %st(0)
-; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fxch %st(3)
 ; KNL-NEXT:    fcmovne %st(4), %st(0)
 ; KNL-NEXT:    fstp %st(4)
 ; KNL-NEXT:    fxch %st(3)
-; KNL-NEXT:    fstpt 20(%rdi)
-; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt (%rdi)
 ; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt 30(%rdi)
+; KNL-NEXT:    fxch %st(1)
+; KNL-NEXT:    fstpt 20(%rdi)
 ; KNL-NEXT:    fstpt 10(%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
@@ -46,20 +46,20 @@ target triple = "x86_64-unknown-linux-gn
 ; SKX:       # %bb.0: # %bb
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
 ; SKX-NEXT:    vpmovd2m %xmm0, %k0
+; SKX-NEXT:    kshiftrb $1, %k0, %k1
+; SKX-NEXT:    kmovd %k1, %eax
 ; SKX-NEXT:    kshiftrb $2, %k0, %k1
-; SKX-NEXT:    kshiftrw $1, %k1, %k2
-; SKX-NEXT:    kmovd %k2, %eax
+; SKX-NEXT:    kshiftrb $1, %k1, %k2
+; SKX-NEXT:    kmovd %k1, %ecx
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld1
 ; SKX-NEXT:    fldz
 ; SKX-NEXT:    fld %st(0)
 ; SKX-NEXT:    fcmovne %st(2), %st(0)
-; SKX-NEXT:    kmovd %k1, %eax
-; SKX-NEXT:    testb $1, %al
+; SKX-NEXT:    testb $1, %cl
 ; SKX-NEXT:    fld %st(1)
 ; SKX-NEXT:    fcmovne %st(3), %st(0)
-; SKX-NEXT:    kshiftrw $1, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %eax
+; SKX-NEXT:    kmovd %k2, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld %st(2)
 ; SKX-NEXT:    fcmovne %st(4), %st(0)
@@ -71,10 +71,10 @@ target triple = "x86_64-unknown-linux-gn
 ; SKX-NEXT:    fxch %st(3)
 ; SKX-NEXT:    fstpt (%rdi)
 ; SKX-NEXT:    fxch %st(1)
-; SKX-NEXT:    fstpt 10(%rdi)
+; SKX-NEXT:    fstpt 30(%rdi)
 ; SKX-NEXT:    fxch %st(1)
 ; SKX-NEXT:    fstpt 20(%rdi)
-; SKX-NEXT:    fstpt 30(%rdi)
+; SKX-NEXT:    fstpt 10(%rdi)
 ; SKX-NEXT:    retq
  bb:
    %tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80             0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer

Modified: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=350918&r1=350917&r2=350918&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Thu Jan 10 21:44:56 2019
@@ -1694,8 +1694,7 @@ define <4 x float> @floor_mask_ss_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -1703,8 +1702,7 @@ define <4 x float> @floor_mask_ss_mask8(
 ;
 ; AVX512VL-LABEL: floor_mask_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -1755,16 +1753,14 @@ define <4 x float> @floor_maskz_ss_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: floor_maskz_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -1808,8 +1804,7 @@ define <2 x double> @floor_mask_sd_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -1817,8 +1812,7 @@ define <2 x double> @floor_mask_sd_mask8
 ;
 ; AVX512VL-LABEL: floor_mask_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -1869,16 +1863,14 @@ define <2 x double> @floor_maskz_sd_mask
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: floor_maskz_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
@@ -2766,8 +2758,7 @@ define <4 x float> @ceil_mask_ss_mask8(<
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -2775,8 +2766,7 @@ define <4 x float> @ceil_mask_ss_mask8(<
 ;
 ; AVX512VL-LABEL: ceil_mask_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -2827,16 +2817,14 @@ define <4 x float> @ceil_maskz_ss_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: ceil_maskz_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -2880,8 +2868,7 @@ define <2 x double> @ceil_mask_sd_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -2889,8 +2876,7 @@ define <2 x double> @ceil_mask_sd_mask8(
 ;
 ; AVX512VL-LABEL: ceil_mask_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -2941,16 +2927,14 @@ define <2 x double> @ceil_maskz_sd_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
-; AVX512F-NEXT:    kmovw %k0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
 ; AVX512F-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: ceil_maskz_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
-; AVX512VL-NEXT:    kmovw %k0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y




More information about the llvm-commits mailing list