[llvm] 56ec6fc - [X86] Split off test coverage for Issue #62014
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 11 04:40:33 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-11T12:36:49+01:00
New Revision: 56ec6fcffe0e9fbab288130c195fbcf50eda6559
URL: https://github.com/llvm/llvm-project/commit/56ec6fcffe0e9fbab288130c195fbcf50eda6559
DIFF: https://github.com/llvm/llvm-project/commit/56ec6fcffe0e9fbab288130c195fbcf50eda6559.diff
LOG: [X86] Split off test coverage for Issue #62014
vector-bo-select.ll should only be used for binop identity select tests
Added:
llvm/test/CodeGen/X86/pr62014.ll
Modified:
llvm/test/CodeGen/X86/vector-bo-select.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/pr62014.ll b/llvm/test/CodeGen/X86/pr62014.ll
new file mode 100644
index 000000000000..cc5cf921da83
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr62014.ll
@@ -0,0 +1,418 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512VL
+
+define <8 x i16> @select_cast_cond_multiuse_v8i16(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) {
+; SSE2-LABEL: select_cast_cond_multiuse_v8i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $7, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $6, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $5, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $4, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $3, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: shrb $2, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: movd %edi, %xmm3
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: shrb %dil
+; SSE2-NEXT: movzbl %dil, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: negl %eax
+; SSE2-NEXT: movd %eax, %xmm6
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
+; SSE2-NEXT: pand %xmm3, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm2, (%rsi)
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: select_cast_cond_multiuse_v8i16:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movl %edi, %eax
+; SSE42-NEXT: shrb %al
+; SSE42-NEXT: movzbl %al, %eax
+; SSE42-NEXT: movl %edi, %ecx
+; SSE42-NEXT: shrb $2, %cl
+; SSE42-NEXT: movzbl %cl, %ecx
+; SSE42-NEXT: movl %edi, %edx
+; SSE42-NEXT: shrb $3, %dl
+; SSE42-NEXT: movzbl %dl, %edx
+; SSE42-NEXT: movl %edi, %r8d
+; SSE42-NEXT: shrb $4, %r8b
+; SSE42-NEXT: movzbl %r8b, %r8d
+; SSE42-NEXT: movl %edi, %r9d
+; SSE42-NEXT: shrb $5, %r9b
+; SSE42-NEXT: movzbl %r9b, %r9d
+; SSE42-NEXT: movl %edi, %r10d
+; SSE42-NEXT: movl %edi, %r11d
+; SSE42-NEXT: movd %edi, %xmm3
+; SSE42-NEXT: andl $1, %edi
+; SSE42-NEXT: negl %edi
+; SSE42-NEXT: movd %edi, %xmm0
+; SSE42-NEXT: andl $1, %eax
+; SSE42-NEXT: negl %eax
+; SSE42-NEXT: pinsrw $1, %eax, %xmm0
+; SSE42-NEXT: andl $1, %ecx
+; SSE42-NEXT: negl %ecx
+; SSE42-NEXT: pinsrw $2, %ecx, %xmm0
+; SSE42-NEXT: andl $1, %edx
+; SSE42-NEXT: negl %edx
+; SSE42-NEXT: pinsrw $3, %edx, %xmm0
+; SSE42-NEXT: andl $1, %r8d
+; SSE42-NEXT: negl %r8d
+; SSE42-NEXT: pinsrw $4, %r8d, %xmm0
+; SSE42-NEXT: andl $1, %r9d
+; SSE42-NEXT: negl %r9d
+; SSE42-NEXT: pinsrw $5, %r9d, %xmm0
+; SSE42-NEXT: shrb $6, %r10b
+; SSE42-NEXT: movzbl %r10b, %eax
+; SSE42-NEXT: andl $1, %eax
+; SSE42-NEXT: negl %eax
+; SSE42-NEXT: pinsrw $6, %eax, %xmm0
+; SSE42-NEXT: shrb $7, %r11b
+; SSE42-NEXT: movzbl %r11b, %eax
+; SSE42-NEXT: negl %eax
+; SSE42-NEXT: pinsrw $7, %eax, %xmm0
+; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
+; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
+; SSE42-NEXT: pand %xmm4, %xmm3
+; SSE42-NEXT: pcmpeqw %xmm4, %xmm3
+; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1
+; SSE42-NEXT: movdqa %xmm3, (%rsi)
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX2-LABEL: select_cast_cond_multiuse_v8i16:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: shrb %al
+; AVX2-NEXT: movzbl %al, %eax
+; AVX2-NEXT: movl %edi, %ecx
+; AVX2-NEXT: shrb $2, %cl
+; AVX2-NEXT: movzbl %cl, %ecx
+; AVX2-NEXT: movl %edi, %edx
+; AVX2-NEXT: shrb $3, %dl
+; AVX2-NEXT: movzbl %dl, %edx
+; AVX2-NEXT: movl %edi, %r8d
+; AVX2-NEXT: shrb $4, %r8b
+; AVX2-NEXT: movzbl %r8b, %r8d
+; AVX2-NEXT: movl %edi, %r9d
+; AVX2-NEXT: shrb $5, %r9b
+; AVX2-NEXT: movzbl %r9b, %r9d
+; AVX2-NEXT: movl %edi, %r10d
+; AVX2-NEXT: movl %edi, %r11d
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: andl $1, %edi
+; AVX2-NEXT: negl %edi
+; AVX2-NEXT: vmovd %edi, %xmm3
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %ecx
+; AVX2-NEXT: negl %ecx
+; AVX2-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %edx
+; AVX2-NEXT: negl %edx
+; AVX2-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %r8d
+; AVX2-NEXT: negl %r8d
+; AVX2-NEXT: vpinsrw $4, %r8d, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %r9d
+; AVX2-NEXT: negl %r9d
+; AVX2-NEXT: vpinsrw $5, %r9d, %xmm3, %xmm3
+; AVX2-NEXT: shrb $6, %r10b
+; AVX2-NEXT: movzbl %r10b, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3
+; AVX2-NEXT: shrb $7, %r11b
+; AVX2-NEXT: movzbl %r11b, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vpblendvb %xmm3, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovdqa %xmm2, (%rsi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: select_cast_cond_multiuse_v8i16:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
+; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX512F-NEXT: vmovdqa %xmm2, (%rsi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: kmovw %edi, %k1
+; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512VL-NEXT: vpmovdw %ymm2, %xmm2
+; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+ %z = bitcast i8 %m to <8 x i1>
+ %s = sext <8 x i1> %z to <8 x i16>
+ %v = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y
+ store <8 x i16> %s, ptr %o
+ ret <8 x i16> %v
+}
+
+define <8 x float> @select_cast_cond_multiuse_v8i16_v8f32(<8 x float> %x, <8 x float> %y, i8 %m, ptr %o) {
+; SSE2-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movl %edi, %eax
+; SSE2-NEXT: movl %edi, %ecx
+; SSE2-NEXT: movl %edi, %edx
+; SSE2-NEXT: movl %edi, %r8d
+; SSE2-NEXT: movl %edi, %r9d
+; SSE2-NEXT: movl %edi, %r10d
+; SSE2-NEXT: movl %edi, %r11d
+; SSE2-NEXT: movd %edi, %xmm4
+; SSE2-NEXT: # kill: def $dil killed $dil killed $edi
+; SSE2-NEXT: shrb %dil
+; SSE2-NEXT: andb $1, %dil
+; SSE2-NEXT: movzbl %dil, %edi
+; SSE2-NEXT: andb $1, %al
+; SSE2-NEXT: movzbl %al, %eax
+; SSE2-NEXT: shrb $2, %cl
+; SSE2-NEXT: andb $1, %cl
+; SSE2-NEXT: movzbl %cl, %ecx
+; SSE2-NEXT: shrb $3, %dl
+; SSE2-NEXT: andb $1, %dl
+; SSE2-NEXT: movzbl %dl, %edx
+; SSE2-NEXT: shrb $4, %r8b
+; SSE2-NEXT: andb $1, %r8b
+; SSE2-NEXT: movzbl %r8b, %r8d
+; SSE2-NEXT: shrb $5, %r9b
+; SSE2-NEXT: andb $1, %r9b
+; SSE2-NEXT: movzbl %r9b, %r9d
+; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: pinsrw $1, %edi, %xmm5
+; SSE2-NEXT: pinsrw $2, %ecx, %xmm5
+; SSE2-NEXT: pinsrw $3, %edx, %xmm5
+; SSE2-NEXT: pinsrw $4, %r8d, %xmm5
+; SSE2-NEXT: pinsrw $5, %r9d, %xmm5
+; SSE2-NEXT: shrb $6, %r10b
+; SSE2-NEXT: andb $1, %r10b
+; SSE2-NEXT: movzbl %r10b, %eax
+; SSE2-NEXT: pinsrw $6, %eax, %xmm5
+; SSE2-NEXT: shrb $7, %r11b
+; SSE2-NEXT: movzbl %r11b, %eax
+; SSE2-NEXT: pinsrw $7, %eax, %xmm5
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0]
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [1,2,4,8,16,32,64,128]
+; SSE2-NEXT: pand %xmm6, %xmm4
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pslld $31, %xmm6
+; SSE2-NEXT: psrad $31, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pslld $31, %xmm5
+; SSE2-NEXT: psrad $31, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: movdqa %xmm4, (%rsi)
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movaps %xmm0, %xmm4
+; SSE42-NEXT: movl %edi, %eax
+; SSE42-NEXT: movl %edi, %ecx
+; SSE42-NEXT: movl %edi, %edx
+; SSE42-NEXT: movl %edi, %r8d
+; SSE42-NEXT: movl %edi, %r9d
+; SSE42-NEXT: movl %edi, %r10d
+; SSE42-NEXT: movl %edi, %r11d
+; SSE42-NEXT: movd %edi, %xmm6
+; SSE42-NEXT: # kill: def $dil killed $dil killed $edi
+; SSE42-NEXT: shrb %dil
+; SSE42-NEXT: andb $1, %dil
+; SSE42-NEXT: movzbl %dil, %edi
+; SSE42-NEXT: andb $1, %al
+; SSE42-NEXT: movzbl %al, %eax
+; SSE42-NEXT: shrb $2, %cl
+; SSE42-NEXT: andb $1, %cl
+; SSE42-NEXT: movzbl %cl, %ecx
+; SSE42-NEXT: shrb $3, %dl
+; SSE42-NEXT: andb $1, %dl
+; SSE42-NEXT: movzbl %dl, %edx
+; SSE42-NEXT: shrb $4, %r8b
+; SSE42-NEXT: andb $1, %r8b
+; SSE42-NEXT: movzbl %r8b, %r8d
+; SSE42-NEXT: shrb $5, %r9b
+; SSE42-NEXT: andb $1, %r9b
+; SSE42-NEXT: movzbl %r9b, %r9d
+; SSE42-NEXT: movd %eax, %xmm5
+; SSE42-NEXT: pinsrb $2, %edi, %xmm5
+; SSE42-NEXT: pinsrb $4, %ecx, %xmm5
+; SSE42-NEXT: pinsrb $6, %edx, %xmm5
+; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
+; SSE42-NEXT: pinsrb $8, %r8d, %xmm5
+; SSE42-NEXT: pinsrb $10, %r9d, %xmm5
+; SSE42-NEXT: shrb $6, %r10b
+; SSE42-NEXT: andb $1, %r10b
+; SSE42-NEXT: movzbl %r10b, %eax
+; SSE42-NEXT: pinsrb $12, %eax, %xmm5
+; SSE42-NEXT: shrb $7, %r11b
+; SSE42-NEXT: movzbl %r11b, %eax
+; SSE42-NEXT: pinsrb $14, %eax, %xmm5
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,0,0,4,5,6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
+; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [1,2,4,8,16,32,64,128]
+; SSE42-NEXT: pand %xmm7, %xmm6
+; SSE42-NEXT: pcmpeqw %xmm7, %xmm6
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: blendvps %xmm0, %xmm4, %xmm2
+; SSE42-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
+; SSE42-NEXT: pslld $31, %xmm5
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvps %xmm0, %xmm1, %xmm3
+; SSE42-NEXT: movdqa %xmm6, (%rsi)
+; SSE42-NEXT: movaps %xmm2, %xmm0
+; SSE42-NEXT: movaps %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX2-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: movl %edi, %ecx
+; AVX2-NEXT: movl %edi, %r8d
+; AVX2-NEXT: movl %edi, %edx
+; AVX2-NEXT: movl %edi, %r10d
+; AVX2-NEXT: movl %edi, %r9d
+; AVX2-NEXT: movl %edi, %r11d
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: # kill: def $dil killed $dil killed $edi
+; AVX2-NEXT: shrb $5, %dil
+; AVX2-NEXT: movzbl %dil, %edi
+; AVX2-NEXT: shrb $4, %al
+; AVX2-NEXT: movzbl %al, %eax
+; AVX2-NEXT: shrb $6, %cl
+; AVX2-NEXT: movzbl %cl, %ecx
+; AVX2-NEXT: shrb $7, %r8b
+; AVX2-NEXT: movzbl %r8b, %r8d
+; AVX2-NEXT: shrb %r10b
+; AVX2-NEXT: movzbl %r10b, %r10d
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovd %eax, %xmm3
+; AVX2-NEXT: andl $1, %edi
+; AVX2-NEXT: negl %edi
+; AVX2-NEXT: vpinsrd $1, %edi, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %ecx
+; AVX2-NEXT: negl %ecx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
+; AVX2-NEXT: negl %r8d
+; AVX2-NEXT: vpinsrd $3, %r8d, %xmm3, %xmm3
+; AVX2-NEXT: andl $1, %edx
+; AVX2-NEXT: negl %edx
+; AVX2-NEXT: vmovd %edx, %xmm4
+; AVX2-NEXT: andl $1, %r10d
+; AVX2-NEXT: negl %r10d
+; AVX2-NEXT: vpinsrd $1, %r10d, %xmm4, %xmm4
+; AVX2-NEXT: shrb $2, %r9b
+; AVX2-NEXT: movzbl %r9b, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
+; AVX2-NEXT: shrb $3, %r11b
+; AVX2-NEXT: movzbl %r11b, %eax
+; AVX2-NEXT: andl $1, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm4, %xmm4
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
+; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vblendvps %ymm3, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqa %xmm2, (%rsi)
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
+; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: vmovdqa %xmm2, (%rsi)
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: select_cast_cond_multiuse_v8i16_v8f32:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: kmovw %edi, %k1
+; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
+; AVX512VL-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
+; AVX512VL-NEXT: vpmovdw %ymm2, (%rsi)
+; AVX512VL-NEXT: retq
+ %z = bitcast i8 %m to <8 x i1>
+ %s = sext <8 x i1> %z to <8 x i16>
+ %v = select <8 x i1> %z, <8 x float> %x, <8 x float> %y
+ store <8 x i16> %s, ptr %o
+ ret <8 x float> %v
+}
diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll
index f16efdff5d18..e1d10688a422 100644
--- a/llvm/test/CodeGen/X86/vector-bo-select.ll
+++ b/llvm/test/CodeGen/X86/vector-bo-select.ll
@@ -5868,415 +5868,3 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
%r = sdiv <8 x i64> %x, %sel
ret <8 x i64> %r
}
-
-define <8 x i16> @PR62014_v8i16(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) {
-; SSE2-LABEL: PR62014_v8i16:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $7, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $6, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $5, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm4
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $4, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $3, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: shrb $2, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm4
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE2-NEXT: movd %edi, %xmm3
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm5
-; SSE2-NEXT: shrb %dil
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: andl $1, %eax
-; SSE2-NEXT: negl %eax
-; SSE2-NEXT: movd %eax, %xmm6
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm5, %xmm0
-; SSE2-NEXT: pandn %xmm1, %xmm5
-; SSE2-NEXT: por %xmm5, %xmm0
-; SSE2-NEXT: movdqa %xmm2, (%rsi)
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: PR62014_v8i16:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm0, %xmm2
-; SSE42-NEXT: movl %edi, %eax
-; SSE42-NEXT: shrb %al
-; SSE42-NEXT: movzbl %al, %eax
-; SSE42-NEXT: movl %edi, %ecx
-; SSE42-NEXT: shrb $2, %cl
-; SSE42-NEXT: movzbl %cl, %ecx
-; SSE42-NEXT: movl %edi, %edx
-; SSE42-NEXT: shrb $3, %dl
-; SSE42-NEXT: movzbl %dl, %edx
-; SSE42-NEXT: movl %edi, %r8d
-; SSE42-NEXT: shrb $4, %r8b
-; SSE42-NEXT: movzbl %r8b, %r8d
-; SSE42-NEXT: movl %edi, %r9d
-; SSE42-NEXT: shrb $5, %r9b
-; SSE42-NEXT: movzbl %r9b, %r9d
-; SSE42-NEXT: movl %edi, %r10d
-; SSE42-NEXT: movl %edi, %r11d
-; SSE42-NEXT: movd %edi, %xmm3
-; SSE42-NEXT: andl $1, %edi
-; SSE42-NEXT: negl %edi
-; SSE42-NEXT: movd %edi, %xmm0
-; SSE42-NEXT: andl $1, %eax
-; SSE42-NEXT: negl %eax
-; SSE42-NEXT: pinsrw $1, %eax, %xmm0
-; SSE42-NEXT: andl $1, %ecx
-; SSE42-NEXT: negl %ecx
-; SSE42-NEXT: pinsrw $2, %ecx, %xmm0
-; SSE42-NEXT: andl $1, %edx
-; SSE42-NEXT: negl %edx
-; SSE42-NEXT: pinsrw $3, %edx, %xmm0
-; SSE42-NEXT: andl $1, %r8d
-; SSE42-NEXT: negl %r8d
-; SSE42-NEXT: pinsrw $4, %r8d, %xmm0
-; SSE42-NEXT: andl $1, %r9d
-; SSE42-NEXT: negl %r9d
-; SSE42-NEXT: pinsrw $5, %r9d, %xmm0
-; SSE42-NEXT: shrb $6, %r10b
-; SSE42-NEXT: movzbl %r10b, %eax
-; SSE42-NEXT: andl $1, %eax
-; SSE42-NEXT: negl %eax
-; SSE42-NEXT: pinsrw $6, %eax, %xmm0
-; SSE42-NEXT: shrb $7, %r11b
-; SSE42-NEXT: movzbl %r11b, %eax
-; SSE42-NEXT: negl %eax
-; SSE42-NEXT: pinsrw $7, %eax, %xmm0
-; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
-; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
-; SSE42-NEXT: pand %xmm4, %xmm3
-; SSE42-NEXT: pcmpeqw %xmm4, %xmm3
-; SSE42-NEXT: pblendvb %xmm0, %xmm2, %xmm1
-; SSE42-NEXT: movdqa %xmm3, (%rsi)
-; SSE42-NEXT: movdqa %xmm1, %xmm0
-; SSE42-NEXT: retq
-;
-; AVX2-LABEL: PR62014_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: shrb $2, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: movl %edi, %edx
-; AVX2-NEXT: shrb $3, %dl
-; AVX2-NEXT: movzbl %dl, %edx
-; AVX2-NEXT: movl %edi, %r8d
-; AVX2-NEXT: shrb $4, %r8b
-; AVX2-NEXT: movzbl %r8b, %r8d
-; AVX2-NEXT: movl %edi, %r9d
-; AVX2-NEXT: shrb $5, %r9b
-; AVX2-NEXT: movzbl %r9b, %r9d
-; AVX2-NEXT: movl %edi, %r10d
-; AVX2-NEXT: movl %edi, %r11d
-; AVX2-NEXT: vmovd %edi, %xmm2
-; AVX2-NEXT: andl $1, %edi
-; AVX2-NEXT: negl %edi
-; AVX2-NEXT: vmovd %edi, %xmm3
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vpinsrw $2, %ecx, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %edx
-; AVX2-NEXT: negl %edx
-; AVX2-NEXT: vpinsrw $3, %edx, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %r8d
-; AVX2-NEXT: negl %r8d
-; AVX2-NEXT: vpinsrw $4, %r8d, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %r9d
-; AVX2-NEXT: negl %r9d
-; AVX2-NEXT: vpinsrw $5, %r9d, %xmm3, %xmm3
-; AVX2-NEXT: shrb $6, %r10b
-; AVX2-NEXT: movzbl %r10b, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $7, %r11b
-; AVX2-NEXT: movzbl %r11b, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
-; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
-; AVX2-NEXT: vpblendvb %xmm3, %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vmovdqa %xmm2, (%rsi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: PR62014_v8i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
-; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
-; AVX512F-NEXT: vmovdqa %xmm2, (%rsi)
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: PR62014_v8i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512VL-NEXT: vpmovdw %ymm2, %xmm2
-; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
-; AVX512VL-NEXT: vmovdqa %xmm2, (%rsi)
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
- %z = bitcast i8 %m to <8 x i1>
- %s = sext <8 x i1> %z to <8 x i16>
- %v = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y
- store <8 x i16> %s, ptr %o
- ret <8 x i16> %v
-}
-
-define <8 x float> @PR62014_v8f32(<8 x float> %x, <8 x float> %y, i8 %m, ptr %o) {
-; SSE2-LABEL: PR62014_v8f32:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movl %edi, %eax
-; SSE2-NEXT: movl %edi, %ecx
-; SSE2-NEXT: movl %edi, %edx
-; SSE2-NEXT: movl %edi, %r8d
-; SSE2-NEXT: movl %edi, %r9d
-; SSE2-NEXT: movl %edi, %r10d
-; SSE2-NEXT: movl %edi, %r11d
-; SSE2-NEXT: movd %edi, %xmm4
-; SSE2-NEXT: # kill: def $dil killed $dil killed $edi
-; SSE2-NEXT: shrb %dil
-; SSE2-NEXT: andb $1, %dil
-; SSE2-NEXT: movzbl %dil, %edi
-; SSE2-NEXT: andb $1, %al
-; SSE2-NEXT: movzbl %al, %eax
-; SSE2-NEXT: shrb $2, %cl
-; SSE2-NEXT: andb $1, %cl
-; SSE2-NEXT: movzbl %cl, %ecx
-; SSE2-NEXT: shrb $3, %dl
-; SSE2-NEXT: andb $1, %dl
-; SSE2-NEXT: movzbl %dl, %edx
-; SSE2-NEXT: shrb $4, %r8b
-; SSE2-NEXT: andb $1, %r8b
-; SSE2-NEXT: movzbl %r8b, %r8d
-; SSE2-NEXT: shrb $5, %r9b
-; SSE2-NEXT: andb $1, %r9b
-; SSE2-NEXT: movzbl %r9b, %r9d
-; SSE2-NEXT: movd %eax, %xmm5
-; SSE2-NEXT: pinsrw $1, %edi, %xmm5
-; SSE2-NEXT: pinsrw $2, %ecx, %xmm5
-; SSE2-NEXT: pinsrw $3, %edx, %xmm5
-; SSE2-NEXT: pinsrw $4, %r8d, %xmm5
-; SSE2-NEXT: pinsrw $5, %r9d, %xmm5
-; SSE2-NEXT: shrb $6, %r10b
-; SSE2-NEXT: andb $1, %r10b
-; SSE2-NEXT: movzbl %r10b, %eax
-; SSE2-NEXT: pinsrw $6, %eax, %xmm5
-; SSE2-NEXT: shrb $7, %r11b
-; SSE2-NEXT: movzbl %r11b, %eax
-; SSE2-NEXT: pinsrw $7, %eax, %xmm5
-; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0]
-; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [1,2,4,8,16,32,64,128]
-; SSE2-NEXT: pand %xmm6, %xmm4
-; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
-; SSE2-NEXT: movdqa %xmm5, %xmm6
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pslld $31, %xmm6
-; SSE2-NEXT: psrad $31, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm0
-; SSE2-NEXT: pandn %xmm2, %xmm6
-; SSE2-NEXT: por %xmm6, %xmm0
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pslld $31, %xmm5
-; SSE2-NEXT: psrad $31, %xmm5
-; SSE2-NEXT: pand %xmm5, %xmm1
-; SSE2-NEXT: pandn %xmm3, %xmm5
-; SSE2-NEXT: por %xmm5, %xmm1
-; SSE2-NEXT: movdqa %xmm4, (%rsi)
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: PR62014_v8f32:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movaps %xmm0, %xmm4
-; SSE42-NEXT: movl %edi, %eax
-; SSE42-NEXT: movl %edi, %ecx
-; SSE42-NEXT: movl %edi, %edx
-; SSE42-NEXT: movl %edi, %r8d
-; SSE42-NEXT: movl %edi, %r9d
-; SSE42-NEXT: movl %edi, %r10d
-; SSE42-NEXT: movl %edi, %r11d
-; SSE42-NEXT: movd %edi, %xmm6
-; SSE42-NEXT: # kill: def $dil killed $dil killed $edi
-; SSE42-NEXT: shrb %dil
-; SSE42-NEXT: andb $1, %dil
-; SSE42-NEXT: movzbl %dil, %edi
-; SSE42-NEXT: andb $1, %al
-; SSE42-NEXT: movzbl %al, %eax
-; SSE42-NEXT: shrb $2, %cl
-; SSE42-NEXT: andb $1, %cl
-; SSE42-NEXT: movzbl %cl, %ecx
-; SSE42-NEXT: shrb $3, %dl
-; SSE42-NEXT: andb $1, %dl
-; SSE42-NEXT: movzbl %dl, %edx
-; SSE42-NEXT: shrb $4, %r8b
-; SSE42-NEXT: andb $1, %r8b
-; SSE42-NEXT: movzbl %r8b, %r8d
-; SSE42-NEXT: shrb $5, %r9b
-; SSE42-NEXT: andb $1, %r9b
-; SSE42-NEXT: movzbl %r9b, %r9d
-; SSE42-NEXT: movd %eax, %xmm5
-; SSE42-NEXT: pinsrb $2, %edi, %xmm5
-; SSE42-NEXT: pinsrb $4, %ecx, %xmm5
-; SSE42-NEXT: pinsrb $6, %edx, %xmm5
-; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero
-; SSE42-NEXT: pinsrb $8, %r8d, %xmm5
-; SSE42-NEXT: pinsrb $10, %r9d, %xmm5
-; SSE42-NEXT: shrb $6, %r10b
-; SSE42-NEXT: andb $1, %r10b
-; SSE42-NEXT: movzbl %r10b, %eax
-; SSE42-NEXT: pinsrb $12, %eax, %xmm5
-; SSE42-NEXT: shrb $7, %r11b
-; SSE42-NEXT: movzbl %r11b, %eax
-; SSE42-NEXT: pinsrb $14, %eax, %xmm5
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,0,0,0,4,5,6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
-; SSE42-NEXT: movdqa {{.*#+}} xmm7 = [1,2,4,8,16,32,64,128]
-; SSE42-NEXT: pand %xmm7, %xmm6
-; SSE42-NEXT: pcmpeqw %xmm7, %xmm6
-; SSE42-NEXT: pslld $31, %xmm0
-; SSE42-NEXT: blendvps %xmm0, %xmm4, %xmm2
-; SSE42-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: movdqa %xmm5, %xmm0
-; SSE42-NEXT: blendvps %xmm0, %xmm1, %xmm3
-; SSE42-NEXT: movdqa %xmm6, (%rsi)
-; SSE42-NEXT: movaps %xmm2, %xmm0
-; SSE42-NEXT: movaps %xmm3, %xmm1
-; SSE42-NEXT: retq
-;
-; AVX2-LABEL: PR62014_v8f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: movl %edi, %r8d
-; AVX2-NEXT: movl %edi, %edx
-; AVX2-NEXT: movl %edi, %r10d
-; AVX2-NEXT: movl %edi, %r9d
-; AVX2-NEXT: movl %edi, %r11d
-; AVX2-NEXT: vmovd %edi, %xmm2
-; AVX2-NEXT: # kill: def $dil killed $dil killed $edi
-; AVX2-NEXT: shrb $5, %dil
-; AVX2-NEXT: movzbl %dil, %edi
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: shrb $6, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: shrb $7, %r8b
-; AVX2-NEXT: movzbl %r8b, %r8d
-; AVX2-NEXT: shrb %r10b
-; AVX2-NEXT: movzbl %r10b, %r10d
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: andl $1, %edi
-; AVX2-NEXT: negl %edi
-; AVX2-NEXT: vpinsrd $1, %edi, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vpinsrd $2, %ecx, %xmm3, %xmm3
-; AVX2-NEXT: negl %r8d
-; AVX2-NEXT: vpinsrd $3, %r8d, %xmm3, %xmm3
-; AVX2-NEXT: andl $1, %edx
-; AVX2-NEXT: negl %edx
-; AVX2-NEXT: vmovd %edx, %xmm4
-; AVX2-NEXT: andl $1, %r10d
-; AVX2-NEXT: negl %r10d
-; AVX2-NEXT: vpinsrd $1, %r10d, %xmm4, %xmm4
-; AVX2-NEXT: shrb $2, %r9b
-; AVX2-NEXT: movzbl %r9b, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: shrb $3, %r11b
-; AVX2-NEXT: movzbl %r11b, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm4, %xmm4
-; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; AVX2-NEXT: vpbroadcastb %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
-; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
-; AVX2-NEXT: vblendvps %ymm3, %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovdqa %xmm2, (%rsi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: PR62014_v8f32:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
-; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
-; AVX512F-NEXT: vmovdqa %xmm2, (%rsi)
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: PR62014_v8f32:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: kmovw %edi, %k1
-; AVX512VL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512VL-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
-; AVX512VL-NEXT: vpmovdw %ymm2, (%rsi)
-; AVX512VL-NEXT: retq
- %z = bitcast i8 %m to <8 x i1>
- %s = sext <8 x i1> %z to <8 x i16>
- %v = select <8 x i1> %z, <8 x float> %x, <8 x float> %y
- store <8 x i16> %s, ptr %o
- ret <8 x float> %v
-}
More information about the llvm-commits
mailing list