[llvm] r359666 - [X86][SSE] Extract i1 elements from vXi1 bool vectors

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 03:02:23 PDT 2019


Author: rksimon
Date: Wed May  1 03:02:22 2019
New Revision: 359666

URL: http://llvm.org/viewvc/llvm-project?rev=359666&view=rev
Log:
[X86][SSE] Extract i1 elements from vXi1 bool vectors

This is an alternative to D59669 which more aggressively extracts i1 elements from vXi1 bool vectors using a MOVMSK.

Differential Revision: https://reviews.llvm.org/D61189

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
    llvm/trunk/test/CodeGen/X86/bool-vector.ll
    llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May  1 03:02:22 2019
@@ -34909,6 +34909,39 @@ static SDValue combineExtractVectorElt(S
   if (SDValue V = scalarizeExtEltFP(N, DAG))
     return V;
 
+  // Attempt to extract a i1 element by using MOVMSK to extract the signbits
+  // and then testing the relevant element.
+  if (CIdx && SrcVT.getScalarType() == MVT::i1) {
+    SmallVector<SDNode *, 16> BoolExtracts;
+    auto IsBoolExtract = [&BoolExtracts](SDNode *Use) {
+      if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          isa<ConstantSDNode>(Use->getOperand(1)) &&
+          Use->getValueType(0) == MVT::i1) {
+        BoolExtracts.push_back(Use);
+        return true;
+      }
+      return false;
+    };
+    if (all_of(InputVector->uses(), IsBoolExtract) &&
+        BoolExtracts.size() > 1) {
+      unsigned NumSrcElts = SrcVT.getVectorNumElements();
+      EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
+      if (SDValue BC =
+              combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
+        for (SDNode *Use : BoolExtracts) {
+          // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
+          unsigned MaskIdx = Use->getConstantOperandVal(1);
+          APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx);
+          SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT);
+          SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
+          Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
+          DCI.CombineTo(Use, Res);
+        }
+        return SDValue(N, 0);
+      }
+    }
+  }
+
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll Wed May  1 03:02:22 2019
@@ -12,29 +12,18 @@
 define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
 ; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
 ; SSE2-SSSE3:       # %bb.0:
-; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
-; SSE2-SSSE3-NEXT:    pxor %xmm1, %xmm0
-; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
-; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0
-; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-SSSE3-NEXT:    por %xmm0, %xmm1
-; SSE2-SSSE3-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %ecx
+; SSE2-SSSE3-NEXT:    movl %ecx, %eax
+; SSE2-SSSE3-NEXT:    shrb %al
+; SSE2-SSSE3-NEXT:    addb %cl, %al
 ; SSE2-SSSE3-NEXT:    retq
 ;
 ; AVX12-LABEL: bitcast_v2i64_to_v2i1:
 ; AVX12:       # %bb.0:
-; AVX12-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX12-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX12-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT:    vpextrb $8, %xmm0, %eax
+; AVX12-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX12-NEXT:    movl %ecx, %eax
+; AVX12-NEXT:    shrb %al
 ; AVX12-NEXT:    addb %cl, %al
-; AVX12-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX12-NEXT:    retq
 ;
 ; AVX512-LABEL: bitcast_v2i64_to_v2i1:

Modified: llvm/trunk/test/CodeGen/X86/bool-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-vector.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-vector.ll Wed May  1 03:02:22 2019
@@ -94,45 +94,14 @@ define i32 @PR15215_good(<4 x i32> %inpu
 ;
 ; X32-SSE2-LABEL: PR15215_good:
 ; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    pushl %esi
-; X32-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X32-SSE2-NEXT:    .cfi_offset %esi, -8
-; X32-SSE2-NEXT:    movd %xmm0, %eax
-; X32-SSE2-NEXT:    andl $1, %eax
-; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X32-SSE2-NEXT:    movd %xmm1, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT:    movd %xmm1, %edx
-; X32-SSE2-NEXT:    andl $1, %edx
-; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-SSE2-NEXT:    movd %xmm0, %esi
-; X32-SSE2-NEXT:    andl $1, %esi
-; X32-SSE2-NEXT:    leal (%eax,%ecx,2), %eax
-; X32-SSE2-NEXT:    leal (%eax,%edx,4), %eax
-; X32-SSE2-NEXT:    leal (%eax,%esi,8), %eax
-; X32-SSE2-NEXT:    popl %esi
-; X32-SSE2-NEXT:    .cfi_def_cfa_offset 4
+; X32-SSE2-NEXT:    pslld $31, %xmm0
+; X32-SSE2-NEXT:    movmskps %xmm0, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-AVX2-LABEL: PR15215_good:
 ; X32-AVX2:       # %bb.0: # %entry
-; X32-AVX2-NEXT:    pushl %esi
-; X32-AVX2-NEXT:    .cfi_def_cfa_offset 8
-; X32-AVX2-NEXT:    .cfi_offset %esi, -8
-; X32-AVX2-NEXT:    vmovd %xmm0, %eax
-; X32-AVX2-NEXT:    andl $1, %eax
-; X32-AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
-; X32-AVX2-NEXT:    andl $1, %ecx
-; X32-AVX2-NEXT:    vpextrd $2, %xmm0, %edx
-; X32-AVX2-NEXT:    andl $1, %edx
-; X32-AVX2-NEXT:    vpextrd $3, %xmm0, %esi
-; X32-AVX2-NEXT:    andl $1, %esi
-; X32-AVX2-NEXT:    leal (%eax,%ecx,2), %eax
-; X32-AVX2-NEXT:    leal (%eax,%edx,4), %eax
-; X32-AVX2-NEXT:    leal (%eax,%esi,8), %eax
-; X32-AVX2-NEXT:    popl %esi
-; X32-AVX2-NEXT:    .cfi_def_cfa_offset 4
+; X32-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vmovmskps %xmm0, %eax
 ; X32-AVX2-NEXT:    retl
 ;
 ; X64-LABEL: PR15215_good:
@@ -152,35 +121,14 @@ define i32 @PR15215_good(<4 x i32> %inpu
 ;
 ; X64-SSE2-LABEL: PR15215_good:
 ; X64-SSE2:       # %bb.0: # %entry
-; X64-SSE2-NEXT:    movd %xmm0, %eax
-; X64-SSE2-NEXT:    andl $1, %eax
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-SSE2-NEXT:    movd %xmm1, %ecx
-; X64-SSE2-NEXT:    andl $1, %ecx
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE2-NEXT:    movd %xmm1, %edx
-; X64-SSE2-NEXT:    andl $1, %edx
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X64-SSE2-NEXT:    movd %xmm0, %esi
-; X64-SSE2-NEXT:    andl $1, %esi
-; X64-SSE2-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-SSE2-NEXT:    leal (%rax,%rdx,4), %eax
-; X64-SSE2-NEXT:    leal (%rax,%rsi,8), %eax
+; X64-SSE2-NEXT:    pslld $31, %xmm0
+; X64-SSE2-NEXT:    movmskps %xmm0, %eax
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR15215_good:
 ; X64-AVX2:       # %bb.0: # %entry
-; X64-AVX2-NEXT:    vmovd %xmm0, %eax
-; X64-AVX2-NEXT:    andl $1, %eax
-; X64-AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
-; X64-AVX2-NEXT:    andl $1, %ecx
-; X64-AVX2-NEXT:    vpextrd $2, %xmm0, %edx
-; X64-AVX2-NEXT:    andl $1, %edx
-; X64-AVX2-NEXT:    vpextrd $3, %xmm0, %esi
-; X64-AVX2-NEXT:    andl $1, %esi
-; X64-AVX2-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-AVX2-NEXT:    leal (%rax,%rdx,4), %eax
-; X64-AVX2-NEXT:    leal (%rax,%rsi,8), %eax
+; X64-AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovmskps %xmm0, %eax
 ; X64-AVX2-NEXT:    retq
 entry:
   %0 = trunc <4 x i32> %input to <4 x i1>

Modified: llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll Wed May  1 03:02:22 2019
@@ -4273,19 +4273,31 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <1
 ; SSE2-LABEL: movmsk_v16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT:    xorb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $15, %ecx
+; SSE2-NEXT:    movl %eax, %edx
+; SSE2-NEXT:    shrl $8, %edx
+; SSE2-NEXT:    andl $1, %edx
+; SSE2-NEXT:    andl $8, %eax
+; SSE2-NEXT:    shrl $3, %eax
+; SSE2-NEXT:    xorl %edx, %eax
+; SSE2-NEXT:    andl %ecx, %eax
+; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
-; AVX-NEXT:    xorl %eax, %ecx
-; AVX-NEXT:    vpextrb $15, %xmm0, %eax
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    shrl $15, %ecx
+; AVX-NEXT:    movl %eax, %edx
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    andl $1, %edx
+; AVX-NEXT:    andl $8, %eax
+; AVX-NEXT:    shrl $3, %eax
+; AVX-NEXT:    xorl %edx, %eax
 ; AVX-NEXT:    andl %ecx, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
@@ -4329,31 +4341,40 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <1
   ret i1 %u2
 }
 
+; TODO: Replace shift+mask chain with NOT+TEST+SETE
 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; SSE2-LABEL: movmsk_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpgtw %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    pextrw $1, %xmm0, %edx
-; SSE2-NEXT:    pextrw $7, %xmm0, %esi
-; SSE2-NEXT:    pextrw $4, %xmm0, %eax
-; SSE2-NEXT:    andl %esi, %eax
-; SSE2-NEXT:    andl %edx, %eax
-; SSE2-NEXT:    andl %ecx, %eax
-; SSE2-NEXT:    # kill: def $al killed $al killed $eax
+; SSE2-NEXT:    packsswb %xmm0, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %ecx
+; SSE2-NEXT:    movl %ecx, %eax
+; SSE2-NEXT:    shrb $7, %al
+; SSE2-NEXT:    movl %ecx, %edx
+; SSE2-NEXT:    andb $16, %dl
+; SSE2-NEXT:    shrb $4, %dl
+; SSE2-NEXT:    andb %al, %dl
+; SSE2-NEXT:    movl %ecx, %eax
+; SSE2-NEXT:    shrb %al
+; SSE2-NEXT:    andb %dl, %al
+; SSE2-NEXT:    andb %cl, %al
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v8i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovd %xmm0, %ecx
-; AVX-NEXT:    vpextrw $1, %xmm0, %edx
-; AVX-NEXT:    vpextrw $7, %xmm0, %esi
-; AVX-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX-NEXT:    andl %esi, %eax
-; AVX-NEXT:    andl %edx, %eax
-; AVX-NEXT:    andl %ecx, %eax
-; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %ecx
+; AVX-NEXT:    movl %ecx, %eax
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    movl %ecx, %edx
+; AVX-NEXT:    andb $16, %dl
+; AVX-NEXT:    shrb $4, %dl
+; AVX-NEXT:    andb %al, %dl
+; AVX-NEXT:    movl %ecx, %eax
+; AVX-NEXT:    shrb %al
+; AVX-NEXT:    andb %dl, %al
+; AVX-NEXT:    andb %cl, %al
 ; AVX-NEXT:    retq
 ;
 ; KNL-LABEL: movmsk_v8i16:
@@ -4401,24 +4422,29 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8
   ret i1 %u3
 }
 
+; TODO: Replace shift+mask chain with AND+CMP.
 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE2-LABEL: movmsk_v4i32:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    xorl %ecx, %eax
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrb $3, %cl
+; SSE2-NEXT:    andb $4, %al
+; SSE2-NEXT:    shrb $2, %al
+; SSE2-NEXT:    xorb %cl, %al
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
-; AVX-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX-NEXT:    xorl %ecx, %eax
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    shrb $3, %cl
+; AVX-NEXT:    andb $4, %al
+; AVX-NEXT:    shrb $2, %al
+; AVX-NEXT:    xorb %cl, %al
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
 ;
@@ -4461,11 +4487,10 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    andl %ecx, %eax
-; SSE2-NEXT:    # kill: def $al killed $al killed $eax
+; SSE2-NEXT:    movmskpd %xmm0, %ecx
+; SSE2-NEXT:    movl %ecx, %eax
+; SSE2-NEXT:    shrb %al
+; SSE2-NEXT:    andb %cl, %al
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v2i64:
@@ -4473,10 +4498,10 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    andl %ecx, %eax
-; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    movl %ecx, %eax
+; AVX-NEXT:    shrb %al
+; AVX-NEXT:    andb %cl, %al
 ; AVX-NEXT:    retq
 ;
 ; KNL-LABEL: movmsk_v2i64:
@@ -4515,25 +4540,17 @@ define i1 @movmsk_v4f32(<4 x float> %x,
 ; SSE2-NEXT:    cmpeqps %xmm1, %xmm2
 ; SSE2-NEXT:    cmpunordps %xmm1, %xmm0
 ; SSE2-NEXT:    orps %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %edx
-; SSE2-NEXT:    pextrw $6, %xmm0, %eax
-; SSE2-NEXT:    orl %edx, %eax
-; SSE2-NEXT:    orl %ecx, %eax
-; SSE2-NEXT:    # kill: def $al killed $al killed $eax
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    testb $14, %al
+; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v4f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeq_uqps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vextractps $1, %xmm0, %ecx
-; AVX-NEXT:    vextractps $2, %xmm0, %edx
-; AVX-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX-NEXT:    orl %edx, %eax
-; AVX-NEXT:    orl %ecx, %eax
-; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    testb $14, %al
+; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
 ; KNL-LABEL: movmsk_v4f32:
@@ -4579,20 +4596,19 @@ define i1 @movmsk_v2f64(<2 x double> %x,
 ; SSE2-LABEL: movmsk_v2f64:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cmplepd %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    andl %ecx, %eax
-; SSE2-NEXT:    # kill: def $al killed $al killed $eax
+; SSE2-NEXT:    movmskpd %xmm1, %ecx
+; SSE2-NEXT:    movl %ecx, %eax
+; SSE2-NEXT:    shrb %al
+; SSE2-NEXT:    andb %cl, %al
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: movmsk_v2f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vextractps $2, %xmm0, %ecx
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    andl %ecx, %eax
-; AVX-NEXT:    # kill: def $al killed $al killed $eax
+; AVX-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX-NEXT:    movl %ecx, %eax
+; AVX-NEXT:    shrb %al
+; AVX-NEXT:    andb %cl, %al
 ; AVX-NEXT:    retq
 ;
 ; KNL-LABEL: movmsk_v2f64:
@@ -4628,26 +4644,21 @@ define i32 @PR39665_c_ray(<2 x double> %
 ; SSE2-LABEL: PR39665_c_ray:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cmpltpd %xmm0, %xmm1
-; SSE2-NEXT:    movapd %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    testb $1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movl $42, %eax
-; SSE2-NEXT:    movl $99, %ecx
-; SSE2-NEXT:    cmovel %ecx, %eax
-; SSE2-NEXT:    testb $1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    movl $42, %ecx
+; SSE2-NEXT:    movl $99, %eax
 ; SSE2-NEXT:    cmovel %ecx, %eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: PR39665_c_ray:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX-NEXT:    testb $1, %al
-; AVX-NEXT:    movl $42, %eax
-; AVX-NEXT:    movl $99, %edx
-; AVX-NEXT:    cmovel %edx, %eax
-; AVX-NEXT:    testb $1, %cl
-; AVX-NEXT:    cmovel %edx, %eax
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    cmpb $3, %al
+; AVX-NEXT:    movl $42, %ecx
+; AVX-NEXT:    movl $99, %eax
+; AVX-NEXT:    cmovel %ecx, %eax
 ; AVX-NEXT:    retq
 ;
 ; KNL-LABEL: PR39665_c_ray:




More information about the llvm-commits mailing list