[llvm] r359666 - [X86][SSE] Extract i1 elements from vXi1 bool vectors
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 03:02:23 PDT 2019
Author: rksimon
Date: Wed May 1 03:02:22 2019
New Revision: 359666
URL: http://llvm.org/viewvc/llvm-project?rev=359666&view=rev
Log:
[X86][SSE] Extract i1 elements from vXi1 bool vectors
This is an alternative to D59669 which more aggressively extracts i1 elements from vXi1 bool vectors using a MOVMSK.
Differential Revision: https://reviews.llvm.org/D61189
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/trunk/test/CodeGen/X86/bool-vector.ll
llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 1 03:02:22 2019
@@ -34909,6 +34909,39 @@ static SDValue combineExtractVectorElt(S
if (SDValue V = scalarizeExtEltFP(N, DAG))
return V;
+ // Attempt to extract a i1 element by using MOVMSK to extract the signbits
+ // and then testing the relevant element.
+ if (CIdx && SrcVT.getScalarType() == MVT::i1) {
+ SmallVector<SDNode *, 16> BoolExtracts;
+ auto IsBoolExtract = [&BoolExtracts](SDNode *Use) {
+ if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Use->getOperand(1)) &&
+ Use->getValueType(0) == MVT::i1) {
+ BoolExtracts.push_back(Use);
+ return true;
+ }
+ return false;
+ };
+ if (all_of(InputVector->uses(), IsBoolExtract) &&
+ BoolExtracts.size() > 1) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
+ if (SDValue BC =
+ combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
+ for (SDNode *Use : BoolExtracts) {
+ // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
+ unsigned MaskIdx = Use->getConstantOperandVal(1);
+ APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx);
+ SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT);
+ SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
+ Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
+ DCI.CombineTo(Use, Res);
+ }
+ return SDValue(N, 0);
+ }
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-vector-bool.ll Wed May 1 03:02:22 2019
@@ -12,29 +12,18 @@
define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
-; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
-; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx
+; SSE2-SSSE3-NEXT: movl %ecx, %eax
+; SSE2-SSSE3-NEXT: shrb %al
+; SSE2-SSSE3-NEXT: addb %cl, %al
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: bitcast_v2i64_to_v2i1:
; AVX12: # %bb.0:
-; AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX12-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT: vpextrb $8, %xmm0, %eax
+; AVX12-NEXT: vmovmskpd %xmm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrb %al
; AVX12-NEXT: addb %cl, %al
-; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
;
; AVX512-LABEL: bitcast_v2i64_to_v2i1:
Modified: llvm/trunk/test/CodeGen/X86/bool-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-vector.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-vector.ll Wed May 1 03:02:22 2019
@@ -94,45 +94,14 @@ define i32 @PR15215_good(<4 x i32> %inpu
;
; X32-SSE2-LABEL: PR15215_good:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X32-SSE2-NEXT: .cfi_offset %esi, -8
-; X32-SSE2-NEXT: movd %xmm0, %eax
-; X32-SSE2-NEXT: andl $1, %eax
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X32-SSE2-NEXT: movd %xmm1, %ecx
-; X32-SSE2-NEXT: andl $1, %ecx
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: movd %xmm1, %edx
-; X32-SSE2-NEXT: andl $1, %edx
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-SSE2-NEXT: movd %xmm0, %esi
-; X32-SSE2-NEXT: andl $1, %esi
-; X32-SSE2-NEXT: leal (%eax,%ecx,2), %eax
-; X32-SSE2-NEXT: leal (%eax,%edx,4), %eax
-; X32-SSE2-NEXT: leal (%eax,%esi,8), %eax
-; X32-SSE2-NEXT: popl %esi
-; X32-SSE2-NEXT: .cfi_def_cfa_offset 4
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: movmskps %xmm0, %eax
; X32-SSE2-NEXT: retl
;
; X32-AVX2-LABEL: PR15215_good:
; X32-AVX2: # %bb.0: # %entry
-; X32-AVX2-NEXT: pushl %esi
-; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
-; X32-AVX2-NEXT: .cfi_offset %esi, -8
-; X32-AVX2-NEXT: vmovd %xmm0, %eax
-; X32-AVX2-NEXT: andl $1, %eax
-; X32-AVX2-NEXT: vpextrd $1, %xmm0, %ecx
-; X32-AVX2-NEXT: andl $1, %ecx
-; X32-AVX2-NEXT: vpextrd $2, %xmm0, %edx
-; X32-AVX2-NEXT: andl $1, %edx
-; X32-AVX2-NEXT: vpextrd $3, %xmm0, %esi
-; X32-AVX2-NEXT: andl $1, %esi
-; X32-AVX2-NEXT: leal (%eax,%ecx,2), %eax
-; X32-AVX2-NEXT: leal (%eax,%edx,4), %eax
-; X32-AVX2-NEXT: leal (%eax,%esi,8), %eax
-; X32-AVX2-NEXT: popl %esi
-; X32-AVX2-NEXT: .cfi_def_cfa_offset 4
+; X32-AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovmskps %xmm0, %eax
; X32-AVX2-NEXT: retl
;
; X64-LABEL: PR15215_good:
@@ -152,35 +121,14 @@ define i32 @PR15215_good(<4 x i32> %inpu
;
; X64-SSE2-LABEL: PR15215_good:
; X64-SSE2: # %bb.0: # %entry
-; X64-SSE2-NEXT: movd %xmm0, %eax
-; X64-SSE2-NEXT: andl $1, %eax
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-SSE2-NEXT: movd %xmm1, %ecx
-; X64-SSE2-NEXT: andl $1, %ecx
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE2-NEXT: movd %xmm1, %edx
-; X64-SSE2-NEXT: andl $1, %edx
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X64-SSE2-NEXT: movd %xmm0, %esi
-; X64-SSE2-NEXT: andl $1, %esi
-; X64-SSE2-NEXT: leal (%rax,%rcx,2), %eax
-; X64-SSE2-NEXT: leal (%rax,%rdx,4), %eax
-; X64-SSE2-NEXT: leal (%rax,%rsi,8), %eax
+; X64-SSE2-NEXT: pslld $31, %xmm0
+; X64-SSE2-NEXT: movmskps %xmm0, %eax
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: PR15215_good:
; X64-AVX2: # %bb.0: # %entry
-; X64-AVX2-NEXT: vmovd %xmm0, %eax
-; X64-AVX2-NEXT: andl $1, %eax
-; X64-AVX2-NEXT: vpextrd $1, %xmm0, %ecx
-; X64-AVX2-NEXT: andl $1, %ecx
-; X64-AVX2-NEXT: vpextrd $2, %xmm0, %edx
-; X64-AVX2-NEXT: andl $1, %edx
-; X64-AVX2-NEXT: vpextrd $3, %xmm0, %esi
-; X64-AVX2-NEXT: andl $1, %esi
-; X64-AVX2-NEXT: leal (%rax,%rcx,2), %eax
-; X64-AVX2-NEXT: leal (%rax,%rdx,4), %eax
-; X64-AVX2-NEXT: leal (%rax,%rsi,8), %eax
+; X64-AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovmskps %xmm0, %eax
; X64-AVX2-NEXT: retq
entry:
%0 = trunc <4 x i32> %input to <4 x i1>
Modified: llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll?rev=359666&r1=359665&r2=359666&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movmsk-cmp.ll Wed May 1 03:02:22 2019
@@ -4273,19 +4273,31 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <1
; SSE2-LABEL: movmsk_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: xorb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: andb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $15, %ecx
+; SSE2-NEXT: movl %eax, %edx
+; SSE2-NEXT: shrl $8, %edx
+; SSE2-NEXT: andl $1, %edx
+; SSE2-NEXT: andl $8, %eax
+; SSE2-NEXT: shrl $3, %eax
+; SSE2-NEXT: xorl %edx, %eax
+; SSE2-NEXT: andl %ecx, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $3, %xmm0, %eax
-; AVX-NEXT: vpextrb $8, %xmm0, %ecx
-; AVX-NEXT: xorl %eax, %ecx
-; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: shrl $15, %ecx
+; AVX-NEXT: movl %eax, %edx
+; AVX-NEXT: shrl $8, %edx
+; AVX-NEXT: andl $1, %edx
+; AVX-NEXT: andl $8, %eax
+; AVX-NEXT: shrl $3, %eax
+; AVX-NEXT: xorl %edx, %eax
; AVX-NEXT: andl %ecx, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -4329,31 +4341,40 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <1
ret i1 %u2
}
+; TODO: Replace shift+mask chain with NOT+TEST+SETE
define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE2-LABEL: movmsk_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pextrw $1, %xmm0, %edx
-; SSE2-NEXT: pextrw $7, %xmm0, %esi
-; SSE2-NEXT: pextrw $4, %xmm0, %eax
-; SSE2-NEXT: andl %esi, %eax
-; SSE2-NEXT: andl %edx, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: packsswb %xmm0, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb $7, %al
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: andb $16, %dl
+; SSE2-NEXT: shrb $4, %dl
+; SSE2-NEXT: andb %al, %dl
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %dl, %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: vpextrw $1, %xmm0, %edx
-; AVX-NEXT: vpextrw $7, %xmm0, %esi
-; AVX-NEXT: vpextrw $4, %xmm0, %eax
-; AVX-NEXT: andl %esi, %eax
-; AVX-NEXT: andl %edx, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb $7, %al
+; AVX-NEXT: movl %ecx, %edx
+; AVX-NEXT: andb $16, %dl
+; AVX-NEXT: shrb $4, %dl
+; AVX-NEXT: andb %al, %dl
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %dl, %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v8i16:
@@ -4401,24 +4422,29 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8
ret i1 %u3
}
+; TODO: Replace shift+mask chain with AND+CMP.
define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2-LABEL: movmsk_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: xorl %ecx, %eax
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrb $3, %cl
+; SSE2-NEXT: andb $4, %al
+; SSE2-NEXT: shrb $2, %al
+; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: xorl %ecx, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: shrb $3, %cl
+; AVX-NEXT: andb $4, %al
+; AVX-NEXT: shrb $2, %al
+; AVX-NEXT: xorb %cl, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
@@ -4461,11 +4487,10 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskpd %xmm0, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v2i64:
@@ -4473,10 +4498,10 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2
; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v2i64:
@@ -4515,25 +4540,17 @@ define i1 @movmsk_v4f32(<4 x float> %x,
; SSE2-NEXT: cmpeqps %xmm1, %xmm2
; SSE2-NEXT: cmpunordps %xmm1, %xmm0
; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %edx
-; SSE2-NEXT: pextrw $6, %xmm0, %eax
-; SSE2-NEXT: orl %edx, %eax
-; SSE2-NEXT: orl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: testb $14, %al
+; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vextractps $1, %xmm0, %ecx
-; AVX-NEXT: vextractps $2, %xmm0, %edx
-; AVX-NEXT: vpextrb $12, %xmm0, %eax
-; AVX-NEXT: orl %edx, %eax
-; AVX-NEXT: orl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: testb $14, %al
+; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v4f32:
@@ -4579,20 +4596,19 @@ define i1 @movmsk_v2f64(<2 x double> %x,
; SSE2-LABEL: movmsk_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmplepd %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskpd %xmm1, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vextractps $2, %xmm0, %ecx
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v2f64:
@@ -4628,26 +4644,21 @@ define i32 @PR39665_c_ray(<2 x double> %
; SSE2-LABEL: PR39665_c_ray:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpltpd %xmm0, %xmm1
-; SSE2-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movl $42, %eax
-; SSE2-NEXT: movl $99, %ecx
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: movl $42, %ecx
+; SSE2-NEXT: movl $99, %eax
; SSE2-NEXT: cmovel %ecx, %eax
; SSE2-NEXT: retq
;
; AVX-LABEL: PR39665_c_ray:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX-NEXT: vpextrb $8, %xmm0, %eax
-; AVX-NEXT: testb $1, %al
-; AVX-NEXT: movl $42, %eax
-; AVX-NEXT: movl $99, %edx
-; AVX-NEXT: cmovel %edx, %eax
-; AVX-NEXT: testb $1, %cl
-; AVX-NEXT: cmovel %edx, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %eax
+; AVX-NEXT: cmpb $3, %al
+; AVX-NEXT: movl $42, %ecx
+; AVX-NEXT: movl $99, %eax
+; AVX-NEXT: cmovel %ecx, %eax
; AVX-NEXT: retq
;
; KNL-LABEL: PR39665_c_ray:
More information about the llvm-commits
mailing list