[llvm] r292876 - [SelectionDAG] Teach getNode to simplify a couple easy cases of EXTRACT_SUBVECTOR
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 23 18:37:00 PST 2017
Author: ctopper
Date: Mon Jan 23 20:36:59 2017
New Revision: 292876
URL: http://llvm.org/viewvc/llvm-project?rev=292876&view=rev
Log:
[SelectionDAG] Teach getNode to simplify a couple easy cases of EXTRACT_SUBVECTOR
Summary:
This teaches getNode to simplify extracting from Undef. This is similar to what is done for EXTRACT_VECTOR_ELT. It also adds support for extracting from CONCAT_VECTOR when we can reuse one of the inputs to the concat. These seem like simple non-target specific optimizations.
For X86 we currently handle undef in extractSubvector, but not all EXTRACT_SUBVECTOR creations go through there.
Ultimately, my motivation here is to simplify extractSubvector and remove custom lowering for EXTRACT_SUBVECTOR since we don't do anything but handle undef and BUILD_VECTOR optimizations, but those should be DAG combines.
Reviewers: RKSimon, delena
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D29000
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=292876&r1=292875&r2=292876&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Jan 23 20:36:59 2017
@@ -4084,6 +4084,19 @@ SDValue SelectionDAG::getNode(unsigned O
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
// during shuffle legalization.
if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=292876&r1=292875&r2=292876&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Mon Jan 23 20:36:59 2017
@@ -1502,301 +1502,22 @@ define void @extload_v8i64(<8 x i8>* %a,
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-LABEL: test21:
; KNL: ## BB#0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: pushq %r15
-; KNL-NEXT: pushq %r14
-; KNL-NEXT: pushq %r13
-; KNL-NEXT: pushq %r12
-; KNL-NEXT: pushq %rbx
-; KNL-NEXT: vpmovsxbd %xmm7, %zmm7
-; KNL-NEXT: vpslld $31, %zmm7, %zmm7
-; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
-; KNL-NEXT: vpslld $31, %zmm6, %zmm6
-; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
-; KNL-NEXT: vpslld $31, %zmm5, %zmm5
-; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
-; KNL-NEXT: vpslld $31, %zmm4, %zmm4
-; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edi
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r13d, %xmm4
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $14, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $15, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $13, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $12, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r8d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $11, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, %edi, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edi
-; KNL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; KNL-NEXT: kshiftlw $10, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $9, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %esi, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $8, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %ebx, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $7, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %ebp, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $6, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %r14d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $5, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %r15d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $4, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %r12d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %edi
-; KNL-NEXT: kshiftlw $3, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %r10d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $2, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $1, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r13d, %xmm4, %xmm4
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %ecx, %xmm5
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $2, %r11d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $3, %r8d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $7, %ebx, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $8, %ebp, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $9, %r14d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $10, %r15d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $11, %edi, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %edi
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $12, %r10d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $13, %r9d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $14, %r12d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $15, %r13d, %xmm5, %xmm5
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %eax, %xmm6
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $14, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $15, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $13, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $12, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $4, %r8d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $11, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $5, %edx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $10, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $9, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $7, %ebp, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $8, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $8, %ebx, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $7, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $9, %r11d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $6, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $10, %edi, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %edi
-; KNL-NEXT: kshiftlw $5, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $11, %r10d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $4, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $12, %r9d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $3, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %r14d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $2, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $14, %r15d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $1, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $15, %r12d, %xmm6, %xmm6
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftrw $15, %k1, %k0
-; KNL-NEXT: vmovd %eax, %xmm7
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $1, %r13d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $3, %r8d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $4, %edx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $5, %esi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $6, %ebp, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $7, %ebx, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $8, %r11d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $9, %edi, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $10, %r10d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $11, %r9d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $12, %r14d, %xmm7, %xmm7
-; KNL-NEXT: vpinsrb $13, %r15d, %xmm7, %xmm7
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0
-; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
-; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsllw $15, %ymm5, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1
-; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
-; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsllw $15, %ymm6, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
-; KNL-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm4
-; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
-; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsllw $15, %ymm7, %ymm4
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3
-; KNL-NEXT: popq %rbx
-; KNL-NEXT: popq %r12
-; KNL-NEXT: popq %r13
-; KNL-NEXT: popq %r14
-; KNL-NEXT: popq %r15
-; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
;
; SKX-LABEL: test21:
Modified: llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll?rev=292876&r1=292875&r2=292876&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-pmovxrm.ll Mon Jan 23 20:36:59 2017
@@ -135,14 +135,12 @@ define <8 x i64> @test_llvm_x86_avx512_p
; X32-LABEL: test_llvm_x86_avx512_pmovzxbq:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovdqu (%eax), %xmm0
-; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X32-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: test_llvm_x86_avx512_pmovzxbq:
; X64: ## BB#0:
-; X64-NEXT: vmovdqu (%rdi), %xmm0
-; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll?rev=292876&r1=292875&r2=292876&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll Mon Jan 23 20:36:59 2017
@@ -78,7 +78,6 @@ declare <8 x double> @llvm.x86.avx512.ma
define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
@@ -119,7 +118,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=292876&r1=292875&r2=292876&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Mon Jan 23 20:36:59 2017
@@ -458,16 +458,10 @@ define <8 x i64> @zext_16i8_to_8i64(<16
; AVX2-NEXT: vmovdqa %ymm2, %ymm0
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: zext_16i8_to_8i64:
-; AVX512F: # BB#0: # %entry
-; AVX512F-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: zext_16i8_to_8i64:
-; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
-; AVX512BW-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: zext_16i8_to_8i64:
+; AVX512: # BB#0: # %entry
+; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; AVX512-NEXT: retq
entry:
%B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%C = zext <8 x i8> %B to <8 x i64>
More information about the llvm-commits
mailing list