[llvm] r302355 - [X86][SSE] Break register dependencies on v16i8/v8i16 BUILD_VECTOR on SSE41
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat May 6 10:30:40 PDT 2017
Author: rksimon
Date: Sat May 6 12:30:39 2017
New Revision: 302355
URL: http://llvm.org/viewvc/llvm-project?rev=302355&view=rev
Log:
[X86][SSE] Break register dependencies on v16i8/v8i16 BUILD_VECTOR on SSE41
rL294581 broke unnecessary register dependencies on partial v16i8/v8i16 BUILD_VECTORs, but on SSE41 we (currently) use insertion for full BUILD_VECTORs as well. By allowing full insertion to occur on SSE41 targets we can break register dependencies here as well.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=302355&r1=302354&r2=302355&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 6 12:30:39 2017
@@ -6057,7 +6057,7 @@ static SDValue LowerBuildVectorv16i8(SDV
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- if (NumNonZero > 8)
+ if (NumNonZero > 8 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
@@ -6145,7 +6145,7 @@ static SDValue LowerBuildVectorv8i16(SDV
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- if (NumNonZero > 4)
+ if (NumNonZero > 4 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
Modified: llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll?rev=302355&r1=302354&r2=302355&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shuffle-vs-trunc-512.ll Sat May 6 12:30:39 2017
@@ -392,8 +392,10 @@ define void @shuffle_v32i16_to_v8i16(<32
; AVX512BW-LABEL: shuffle_v32i16_to_v8i16:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
+; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; AVX512BW-NEXT: vpextrw $4, %xmm0, %eax
-; AVX512BW-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1
+; AVX512BW-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
; AVX512BW-NEXT: vmovd %xmm2, %eax
; AVX512BW-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
@@ -416,8 +418,10 @@ define void @shuffle_v32i16_to_v8i16(<32
; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu16 (%rdi), %zmm0
+; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512BWVL-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; AVX512BWVL-NEXT: vpextrw $4, %xmm0, %eax
-; AVX512BWVL-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1
+; AVX512BWVL-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
; AVX512BWVL-NEXT: vmovd %xmm2, %eax
; AVX512BWVL-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=302355&r1=302354&r2=302355&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Sat May 6 12:30:39 2017
@@ -1303,70 +1303,39 @@ define <8 x i16> @var_shuffle_v8i16_v8i1
; SSE41-NEXT: andl $7, %r8d
; SSE41-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE41-NEXT: andl $7, %r9d
-; SSE41-NEXT: movzwl -40(%rsp,%rdi,2), %eax
-; SSE41-NEXT: movd %eax, %xmm1
-; SSE41-NEXT: pinsrw $1, -24(%rsp,%rsi,2), %xmm1
-; SSE41-NEXT: pinsrw $2, -40(%rsp,%rdx,2), %xmm1
-; SSE41-NEXT: pinsrw $3, -24(%rsp,%rcx,2), %xmm1
-; SSE41-NEXT: pinsrw $4, -40(%rsp,%r8,2), %xmm1
-; SSE41-NEXT: pinsrw $5, -24(%rsp,%r9,2), %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: pinsrw $0, -40(%rsp,%rdi,2), %xmm0
+; SSE41-NEXT: pinsrw $1, -24(%rsp,%rsi,2), %xmm0
+; SSE41-NEXT: pinsrw $2, -40(%rsp,%rdx,2), %xmm0
+; SSE41-NEXT: pinsrw $3, -24(%rsp,%rcx,2), %xmm0
+; SSE41-NEXT: pinsrw $4, -40(%rsp,%r8,2), %xmm0
+; SSE41-NEXT: pinsrw $5, -24(%rsp,%r9,2), %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
-; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
-; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
-; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
-; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
-; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; AVX1-NEXT: andl $7, %edi
-; AVX1-NEXT: andl $7, %esi
-; AVX1-NEXT: andl $7, %edx
-; AVX1-NEXT: andl $7, %ecx
-; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: andl $7, %r8d
-; AVX1-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; AVX1-NEXT: andl $7, %r9d
-; AVX1-NEXT: movzwl -40(%rsp,%rdi,2), %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
-; AVX1-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
-; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
-; AVX1-NEXT: vpinsrw $4, -40(%rsp,%r8,2), %xmm0, %xmm0
-; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
-; AVX2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
-; AVX2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
-; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
-; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
-; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; AVX2-NEXT: andl $7, %edi
-; AVX2-NEXT: andl $7, %esi
-; AVX2-NEXT: andl $7, %edx
-; AVX2-NEXT: andl $7, %ecx
-; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: andl $7, %r8d
-; AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: andl $7, %r9d
-; AVX2-NEXT: movzwl -40(%rsp,%rdi,2), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
-; AVX2-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
-; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
-; AVX2-NEXT: vpinsrw $4, -40(%rsp,%r8,2), %xmm0, %xmm0
-; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
-; AVX2-NEXT: retq
+; AVX-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
+; AVX: # BB#0:
+; AVX-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
+; AVX-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
+; AVX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
+; AVX-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
+; AVX-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
+; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; AVX-NEXT: andl $7, %edi
+; AVX-NEXT: andl $7, %esi
+; AVX-NEXT: andl $7, %edx
+; AVX-NEXT: andl $7, %ecx
+; AVX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: andl $7, %r8d
+; AVX-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
+; AVX-NEXT: andl $7, %r9d
+; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $0, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $4, -40(%rsp,%r8,2), %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
+; AVX-NEXT: retq
%x0 = extractelement <8 x i16> %x, i16 %i0
%y1 = extractelement <8 x i16> %y, i16 %i1
%x2 = extractelement <8 x i16> %x, i16 %i2
More information about the llvm-commits
mailing list