[llvm] r327385 - [X86][SSE41] createVariablePermute v2X64 - PCMPEQQ can test for index 0/1 and select between them.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 13 05:22:59 PDT 2018
Author: rksimon
Date: Tue Mar 13 05:22:58 2018
New Revision: 327385
URL: http://llvm.org/viewvc/llvm-project?rev=327385&view=rev
Log:
[X86][SSE41] createVariablePermute v2X64 - PCMPEQQ can test for index 0/1 and select between them.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/var-permute-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=327385&r1=327384&r2=327385&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Mar 13 05:22:58 2018
@@ -8039,6 +8039,14 @@ SDValue createVariablePermute(MVT VT, SD
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
Opcode = X86ISD::VPERMILPV;
ShuffleVT = MVT::v2f64;
+ } else if (Subtarget.hasSSE41()) {
+ // SSE41 can compare v2i64 - select between indices 0 and 1.
+ return DAG.getSelectCC(
+ DL, IndicesVec,
+ getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL),
+ DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}),
+ DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}),
+ ISD::CondCode::SETEQ);
}
break;
case MVT::v32i8:
Modified: llvm/trunk/test/CodeGen/X86/var-permute-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/var-permute-128.ll?rev=327385&r1=327384&r2=327385&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/var-permute-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/var-permute-128.ll Tue Mar 13 05:22:58 2018
@@ -40,14 +40,13 @@ define <2 x i64> @var_shuffle_v2i64(<2 x
;
; SSE41-LABEL: var_shuffle_v2i64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movq %xmm1, %rax
-; SSE41-NEXT: andl $1, %eax
-; SSE41-NEXT: pextrq $1, %xmm1, %rcx
-; SSE41-NEXT: andl $1, %ecx
-; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,1,0,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: var_shuffle_v2i64:
@@ -402,13 +401,13 @@ define <2 x double> @var_shuffle_v2f64(<
;
; SSE41-LABEL: var_shuffle_v2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movq %xmm1, %rax
-; SSE41-NEXT: andl $1, %eax
-; SSE41-NEXT: pextrq $1, %xmm1, %rcx
-; SSE41-NEXT: andl $1, %ecx
-; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm2[0,0]
+; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
+; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: movapd %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: var_shuffle_v2f64:
More information about the llvm-commits
mailing list