[llvm] r314152 - [X86] Don't select anyext GR32->GR64 to SUBREG_TO_REG. Use INSERT_SUBREG instead.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 14:14:59 PDT 2017
Author: ctopper
Date: Mon Sep 25 14:14:59 2017
New Revision: 314152
URL: http://llvm.org/viewvc/llvm-project?rev=314152&view=rev
Log:
[X86] Don't select anyext GR32->GR64 to SUBREG_TO_REG. Use INSERT_SUBREG instead.
As far as I know SUBREG_TO_REG is stating that the upper bits are 0. But if we are just converting the GR32 with no checks, then we have no reason to say the upper bits are 0.
I don't really know how to test this today since I can't find anything that looks that closely at SUBREG_TO_REG. The test changes here seems to be some perturbance of register allocation.
Differential Revision: https://reviews.llvm.org/D38001
Modified:
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=314152&r1=314151&r2=314152&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Mon Sep 25 14:14:59 2017
@@ -1241,7 +1241,7 @@ def : Pat<(i64 (anyext GR8 :$src)),
def : Pat<(i64 (anyext GR16:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
// Any instruction that defines a 32-bit result leaves the high half of the
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll?rev=314152&r1=314151&r2=314152&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-128.ll Mon Sep 25 14:14:59 2017
@@ -255,28 +255,28 @@ define <8 x i16> @var_shuffle_v8i16_v8i1
; SSE2-NEXT: andl $7, %r10d
; SSE2-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $7, %eax
-; SSE2-NEXT: movzwl -24(%rsp,%rax,2), %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
-; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movzwl -24(%rsp,%rdx,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%rdx,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: movzwl -24(%rsp,%rdi,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
@@ -299,28 +299,28 @@ define <8 x i16> @var_shuffle_v8i16_v8i1
; SSSE3-NEXT: andl $7, %r10d
; SSSE3-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $7, %eax
-; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll?rev=314152&r1=314151&r2=314152&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-variable-256.ll Mon Sep 25 14:14:59 2017
@@ -295,52 +295,52 @@ define <16 x i16> @var_shuffle_v16i16_v1
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; AVX1-NEXT: andl $15, %edi
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
+; AVX1-NEXT: movzwl (%rsp,%rdi,2), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: andl $15, %esi
+; AVX1-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $15, %edx
+; AVX1-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $15, %ecx
+; AVX1-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $15, %r8d
+; AVX1-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $15, %r9d
+; AVX1-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
+; AVX1-NEXT: movl 16(%rbp), %eax
+; AVX1-NEXT: andl $15, %eax
+; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: movl 24(%rbp), %eax
+; AVX1-NEXT: andl $15, %eax
+; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl 32(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: movl 40(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 48(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 56(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 64(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 72(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl 80(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX1-NEXT: movl 88(%rbp), %eax
-; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX1-NEXT: andl $15, %edi
-; AVX1-NEXT: movzwl (%rsp,%rdi,2), %eax
-; AVX1-NEXT: vmovd %eax, %xmm1
-; AVX1-NEXT: andl $15, %esi
-; AVX1-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $15, %edx
-; AVX1-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $15, %ecx
-; AVX1-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $15, %r8d
-; AVX1-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $15, %r9d
-; AVX1-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
-; AVX1-NEXT: movl 16(%rbp), %eax
-; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
-; AVX1-NEXT: movl 24(%rbp), %eax
+; AVX1-NEXT: movl 88(%rbp), %eax
; AVX1-NEXT: andl $15, %eax
; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
@@ -357,52 +357,52 @@ define <16 x i16> @var_shuffle_v16i16_v1
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; AVX2-NEXT: andl $15, %edi
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
+; AVX2-NEXT: movzwl (%rsp,%rdi,2), %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: andl $15, %esi
+; AVX2-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $15, %edx
+; AVX2-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $15, %ecx
+; AVX2-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $15, %r8d
+; AVX2-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $15, %r9d
+; AVX2-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
+; AVX2-NEXT: movl 16(%rbp), %eax
+; AVX2-NEXT: andl $15, %eax
+; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: movl 24(%rbp), %eax
+; AVX2-NEXT: andl $15, %eax
+; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl 32(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: movl 40(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 48(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 56(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 64(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 72(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl 80(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX2-NEXT: movl 88(%rbp), %eax
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
-; AVX2-NEXT: andl $15, %edi
-; AVX2-NEXT: movzwl (%rsp,%rdi,2), %eax
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: andl $15, %esi
-; AVX2-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $15, %edx
-; AVX2-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $15, %ecx
-; AVX2-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $15, %r8d
-; AVX2-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $15, %r9d
-; AVX2-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm1, %xmm1
-; AVX2-NEXT: movl 16(%rbp), %eax
-; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
-; AVX2-NEXT: movl 24(%rbp), %eax
+; AVX2-NEXT: movl 88(%rbp), %eax
; AVX2-NEXT: andl $15, %eax
; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
@@ -450,52 +450,52 @@ define <16 x i16> @var_shuffle_v16i16_v8
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; AVX1-NEXT: andl $7, %edi
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movzwl -24(%rsp,%rdi,2), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: andl $7, %esi
+; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $7, %edx
+; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $7, %ecx
+; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $7, %r8d
+; AVX1-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
+; AVX1-NEXT: andl $7, %r9d
+; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax
+; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
-; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
-; AVX1-NEXT: andl $7, %edi
-; AVX1-NEXT: movzwl -24(%rsp,%rdi,2), %eax
-; AVX1-NEXT: vmovd %eax, %xmm1
-; AVX1-NEXT: andl $7, %esi
-; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $7, %edx
-; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $7, %ecx
-; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $7, %r8d
-; AVX1-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm1, %xmm1
-; AVX1-NEXT: andl $7, %r9d
-; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
+; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX1-NEXT: andl $7, %eax
; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
@@ -506,52 +506,52 @@ define <16 x i16> @var_shuffle_v16i16_v8
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; AVX2-NEXT: andl $7, %edi
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: andl $7, %esi
+; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $7, %edx
+; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $7, %ecx
+; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $7, %r8d
+; AVX2-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
+; AVX2-NEXT: andl $7, %r9d
+; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax
+; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
-; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
-; AVX2-NEXT: andl $7, %edi
-; AVX2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: andl $7, %esi
-; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $7, %edx
-; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $7, %ecx
-; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $7, %r8d
-; AVX2-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm1, %xmm1
-; AVX2-NEXT: andl $7, %r9d
-; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm1, %xmm1
+; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
; AVX2-NEXT: andl $7, %eax
; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%x0 = extractelement <8 x i16> %x, i32 %i0
%x1 = extractelement <8 x i16> %x, i32 %i1
More information about the llvm-commits
mailing list