[llvm] r356858 - [X86][AVX] Start shuffle combining from ZERO_EXTEND_VECTOR_INREG (PR40685)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 24 09:30:35 PDT 2019
Author: rksimon
Date: Sun Mar 24 09:30:35 2019
New Revision: 356858
URL: http://llvm.org/viewvc/llvm-project?rev=356858&view=rev
Log:
[X86][AVX] Start shuffle combining from ZERO_EXTEND_VECTOR_INREG (PR40685)
Just enable this for AVX for now as SSE41 introduces extra register moves for the PMOVZX(PSHUFD(V)) -> UNPCKH(V,0) pattern (but otherwise helps reduce port5 usage on Intel targets).
Only AVX support is required for PR40685 as the issue is due to 8i8->8i32 zext shuffle leftovers.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avg.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
llvm/trunk/test/CodeGen/X86/psubus.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll
llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Mar 24 09:30:35 2019
@@ -1888,6 +1888,7 @@ X86TargetLowering::X86TargetLowering(con
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -42597,7 +42598,8 @@ static SDValue combinePMULDQ(SDNode *N,
return SDValue();
}
-static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Disabling for widening legalization for now. We can enable if we find a
// case that needs it. Otherwise it can be deleted when we switch to
// widening legalization.
@@ -42613,6 +42615,15 @@ static SDValue combineExtInVec(SDNode *N
TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
+ // Attempt to combine as a shuffle.
+ // TODO: SSE41 support
+ if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ SDValue Op(N, 0);
+ if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+ return Res;
+ }
+
return SDValue();
}
@@ -42679,7 +42690,8 @@ SDValue X86TargetLowering::PerformDAGCom
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
- case ISD::ANY_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
Modified: llvm/trunk/test/CodeGen/X86/avg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avg.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avg.ll Sun Mar 24 09:30:35 2019
@@ -2047,78 +2047,80 @@ define void @not_avg_v16i8_wide_constant
; AVX1-NEXT: pushq %r13
; AVX1-NEXT: pushq %r12
; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
-; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7]
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
-; AVX1-NEXT: vpextrq $1, %xmm5, %r15
-; AVX1-NEXT: vmovq %xmm5, %r12
+; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; AVX1-NEXT: vpextrq $1, %xmm6, %r15
+; AVX1-NEXT: vmovq %xmm6, %r12
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpextrq $1, %xmm2, %r11
; AVX1-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm7[2],xmm0[3],xmm7[3]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm5[2],xmm2[3],xmm5[3]
; AVX1-NEXT: vpextrq $1, %xmm2, %r13
; AVX1-NEXT: vmovq %xmm2, %r14
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm7[4],xmm4[5],xmm7[5],xmm4[6],xmm7[6],xmm4[7],xmm7[7]
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm7[2],xmm1[3],xmm7[3]
-; AVX1-NEXT: vpextrq $1, %xmm5, %rbx
-; AVX1-NEXT: vmovq %xmm5, %rdx
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm8 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; AVX1-NEXT: vpextrq $1, %xmm6, %rbx
+; AVX1-NEXT: vmovq %xmm6, %rdx
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpextrq $1, %xmm1, %r9
; AVX1-NEXT: vmovq %xmm1, %r10
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm7[2],xmm1[3],xmm7[3]
-; AVX1-NEXT: vmovd %xmm6, %esi
-; AVX1-NEXT: vpextrd $1, %xmm6, %edi
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; AVX1-NEXT: vmovd %xmm7, %esi
+; AVX1-NEXT: vpextrd $1, %xmm7, %edi
+; AVX1-NEXT: vpextrd $2, %xmm7, %ecx
+; AVX1-NEXT: vpextrd $3, %xmm7, %ebp
+; AVX1-NEXT: vpextrd $3, %xmm6, %eax
+; AVX1-NEXT: leal -1(%rbp,%rax), %eax
+; AVX1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; AVX1-NEXT: vpextrd $2, %xmm6, %eax
-; AVX1-NEXT: vpextrd $3, %xmm6, %ebp
-; AVX1-NEXT: vpextrd $3, %xmm5, %ecx
-; AVX1-NEXT: leal -1(%rbp,%rcx), %ecx
-; AVX1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX1-NEXT: vpextrd $2, %xmm5, %ecx
-; AVX1-NEXT: leal -1(%rax,%rcx), %eax
+; AVX1-NEXT: leal -1(%rcx,%rax), %eax
; AVX1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX1-NEXT: vpextrd $1, %xmm5, %ecx
-; AVX1-NEXT: leal -1(%rdi,%rcx), %eax
+; AVX1-NEXT: vpextrd $1, %xmm6, %eax
+; AVX1-NEXT: leal -1(%rdi,%rax), %eax
; AVX1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; AVX1-NEXT: vmovd %xmm5, %ecx
-; AVX1-NEXT: leal -1(%rsi,%rcx), %r8d
-; AVX1-NEXT: vpextrq $1, %xmm4, %rcx
+; AVX1-NEXT: vmovd %xmm6, %eax
+; AVX1-NEXT: leal -1(%rsi,%rax), %r8d
+; AVX1-NEXT: vpextrq $1, %xmm5, %rax
; AVX1-NEXT: leal -1(%r15,%rbx), %r15d
-; AVX1-NEXT: vmovq %xmm4, %rsi
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX1-NEXT: vmovq %xmm5, %rsi
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
; AVX1-NEXT: leal -1(%r12,%rdx), %edx
-; AVX1-NEXT: vmovd %xmm2, %r12d
+; AVX1-NEXT: vmovd %xmm4, %r12d
; AVX1-NEXT: leal -1(%r11,%r9), %r11d
-; AVX1-NEXT: vpextrd $1, %xmm2, %edi
-; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; AVX1-NEXT: leal -1(%rax,%r10), %r10d
+; AVX1-NEXT: vpextrd $2, %xmm4, %edi
+; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX1-NEXT: leal -1(%rcx,%r10), %r10d
; AVX1-NEXT: vpextrd $2, %xmm2, %ebx
-; AVX1-NEXT: leal -1(%r13,%rcx), %r9d
-; AVX1-NEXT: vpextrd $3, %xmm2, %ecx
+; AVX1-NEXT: leal -1(%r13,%rax), %r9d
+; AVX1-NEXT: vpextrd $3, %xmm2, %eax
; AVX1-NEXT: leal -1(%r14,%rsi), %esi
-; AVX1-NEXT: vpextrd $3, %xmm3, %eax
-; AVX1-NEXT: leal -1(%rcx,%rax), %ecx
-; AVX1-NEXT: vpextrd $2, %xmm3, %eax
-; AVX1-NEXT: leal -1(%rbx,%rax), %ebx
-; AVX1-NEXT: vpextrd $1, %xmm3, %eax
-; AVX1-NEXT: leal -1(%rdi,%rax), %eax
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: leal -1(%rax,%rcx), %eax
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: leal -1(%rbx,%rcx), %ebx
+; AVX1-NEXT: vpextrd $2, %xmm3, %ecx
+; AVX1-NEXT: leal -1(%rdi,%rcx), %ecx
; AVX1-NEXT: vmovd %xmm3, %edi
; AVX1-NEXT: leal -1(%r12,%rdi), %edi
-; AVX1-NEXT: vpextrq $1, %xmm0, %r12
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpextrq $1, %xmm1, %r13
+; AVX1-NEXT: vpextrq $1, %xmm8, %r12
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: vpextrq $1, %xmm0, %r13
; AVX1-NEXT: leal -1(%r12,%r13), %r12d
-; AVX1-NEXT: vmovq %xmm0, %r13
-; AVX1-NEXT: vmovq %xmm1, %r14
+; AVX1-NEXT: vmovq %xmm8, %r13
+; AVX1-NEXT: vmovq %xmm0, %r14
; AVX1-NEXT: leal -1(%r13,%r14), %ebp
; AVX1-NEXT: shrl %ebp
; AVX1-NEXT: vmovd %ebp, %xmm0
@@ -2138,12 +2140,12 @@ define void @not_avg_v16i8_wide_constant
; AVX1-NEXT: vpinsrb $7, %r15d, %xmm0, %xmm0
; AVX1-NEXT: shrl %edi
; AVX1-NEXT: vpinsrb $8, %edi, %xmm0, %xmm0
-; AVX1-NEXT: shrl %eax
-; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; AVX1-NEXT: shrl %ecx
+; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
; AVX1-NEXT: shrl %ebx
; AVX1-NEXT: vpinsrb $10, %ebx, %xmm0, %xmm0
-; AVX1-NEXT: shrl %ecx
-; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: shrl %eax
+; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; AVX1-NEXT: shrl %r8d
; AVX1-NEXT: vpinsrb $12, %r8d, %xmm0, %xmm0
; AVX1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Sun Mar 24 09:30:35 2019
@@ -940,9 +940,10 @@ define <2 x i64> @test46(<2 x float> %x,
; AVX512-LABEL: test46:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
-; AVX512-NEXT: vpmovzxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x35,0xc0]
-; AVX512-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
+; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x14,0xc1]
+; AVX512-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte
; AVX512-NEXT: retq ## encoding: [0xc3]
;
Modified: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits-vector.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll Sun Mar 24 09:30:35 2019
@@ -76,15 +76,15 @@ define <4 x i32> @knownbits_mask_shuffle
; X32-LABEL: knownbits_mask_shuffle_sext:
; X32: # %bb.0:
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_sext:
; X64: # %bb.0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -96,15 +96,15 @@ define <4 x i32> @knownbits_mask_shuffle
; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
; X32: # %bb.0:
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
; X64: # %bb.0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sun Mar 24 09:30:35 2019
@@ -1950,16 +1950,10 @@ define <16 x i16> @psubus_16i32_max(<16
; AVX2-NEXT: vpminud %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpackusdw %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpsubusw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
-; AVX2-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsubusw %xmm2, %xmm3, %xmm2
+; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: psubus_16i32_max:
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll Sun Mar 24 09:30:35 2019
@@ -352,18 +352,16 @@ define <16 x i8> @test_divconstant_16i8(
; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm3
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
@@ -864,18 +862,16 @@ define <16 x i8> @test_remconstant_16i8(
; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm3
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
-; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
@@ -888,16 +884,14 @@ define <16 x i8> @test_remconstant_16i8(
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Sun Mar 24 09:30:35 2019
@@ -253,18 +253,16 @@ define <32 x i8> @test_divconstant_32i8(
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6
-; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7]
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
@@ -286,18 +284,16 @@ define <32 x i8> @test_divconstant_32i8(
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6
-; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,6],xmm6[7]
-; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
@@ -688,18 +684,16 @@ define <32 x i8> @test_remconstant_32i8(
; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6
-; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0],xmm5[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7]
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm5, %xmm5
@@ -708,39 +702,35 @@ define <32 x i8> @test_remconstant_32i8(
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
-; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm4
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
-; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm5
-; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm7 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpsllw $7, %xmm7, %xmm7
-; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,5,6],xmm7[7]
-; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6
-; AVX1-NEXT: vpackuswb %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6
+; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,6],xmm6[7]
+; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm5, %xmm5
+; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5
+; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm1[8],xmm5[9],xmm1[9],xmm5[10],xmm1[10],xmm5[11],xmm1[11],xmm5[12],xmm1[12],xmm5[13],xmm1[13],xmm5[14],xmm1[14],xmm5[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm6, %xmm6
@@ -753,16 +743,14 @@ define <32 x i8> @test_remconstant_32i8(
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
-; AVX1-NEXT: vpackuswb %xmm1, %xmm4, %xmm4
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm4, %xmm4
-; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
-; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll Sun Mar 24 09:30:35 2019
@@ -772,32 +772,29 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX1-LABEL: test_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm1
+; AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v2i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm1
+; AVX2-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: test_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX512BW-NEXT: vpmaxuq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpblendd {{.*#+}} xmm2 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512BW-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX512BW-NEXT: vpmaxuq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll Sun Mar 24 09:30:35 2019
@@ -771,32 +771,29 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX1-LABEL: test_v2i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
+; AVX1-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v2i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm1
+; AVX2-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: test_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX512BW-NEXT: vpminuq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpblendd {{.*#+}} xmm2 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512BW-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX512BW-NEXT: vpminuq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vmovd %xmm0, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll Sun Mar 24 09:30:35 2019
@@ -307,18 +307,17 @@ define <8 x i8> @var_shift_v8i8(<8 x i8>
;
; AVX1-LABEL: var_shift_v8i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15]
-; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
-; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; AVX1-NEXT: vpslld $23, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=356858&r1=356857&r2=356858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Sun Mar 24 09:30:35 2019
@@ -123,15 +123,14 @@ define <32 x i16> @zext_32i8_to_32i16(<3
;
; AVX1-LABEL: zext_32i8_to_32i16:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vmovaps %ymm2, %ymm0
; AVX1-NEXT: retq
;
@@ -585,15 +584,14 @@ define <16 x i32> @zext_16i16_to_16i32(<
;
; AVX1-LABEL: zext_16i16_to_16i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vmovaps %ymm2, %ymm0
; AVX1-NEXT: retq
;
@@ -884,15 +882,14 @@ define <8 x i64> @zext_8i32_to_8i64(<8 x
;
; AVX1-LABEL: zext_8i32_to_8i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vmovaps %ymm2, %ymm0
; AVX1-NEXT: retq
;
@@ -1736,11 +1733,33 @@ define <2 x i64> @shuf_zext_16i8_to_2i64
; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
-; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: retq
+; AVX1-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX2-FAST: # %bb.0: # %entry
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
+;
+; AVX512F-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: retq
entry:
%B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%Z = bitcast <16 x i8> %B to <2 x i64>
@@ -1824,11 +1843,33 @@ define <2 x i64> @shuf_zext_8i16_to_2i64
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT: retq
+; AVX1-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX2-SLOW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX2-FAST: # %bb.0: # %entry
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
+;
+; AVX512F-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: retq
entry:
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
%Z = bitcast <8 x i16> %B to <2 x i64>
More information about the llvm-commits
mailing list