[llvm] f591231 - [X86] combineSelect - canonicalize (vXi1 bitcast(iX Cond)) with combineToExtendBoolVectorInReg before legalization
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 15 05:16:36 PDT 2022
Author: Simon Pilgrim
Date: 2022-03-15T12:16:11Z
New Revision: f591231cadb8c539cc24afa522160b704b6dfe1d
URL: https://github.com/llvm/llvm-project/commit/f591231cadb8c539cc24afa522160b704b6dfe1d
DIFF: https://github.com/llvm/llvm-project/commit/f591231cadb8c539cc24afa522160b704b6dfe1d.diff
LOG: [X86] combineSelect - canonicalize (vXi1 bitcast(iX Cond)) with combineToExtendBoolVectorInReg before legalization
This replaces the attempt in 20af71f8ec47319d375a871db6fd3889c2487cbd to use combineToExtendBoolVectorInReg to create X86ISD::BLENDV masks directly, instead we use it to canonicalize the iX bitcast to a sign-extended mask and then truncate it back to vXi1 prior to legalization breaking it apart.
Fixes #53760
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-bo-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d8c4e7b69c153..42a73cd7d8398 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43742,17 +43742,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();
- // PreAVX512, without mask-registers, attempt to sign-extend bool vectors to
- // allow us to use BLENDV.
- if (!Subtarget.hasAVX512() && BitWidth == 1) {
- EVT CondVT = VT.changeVectorElementTypeToInteger();
- if (SDValue ExtCond = combineToExtendBoolVectorInReg(
- ISD::SIGN_EXTEND, SDLoc(N), CondVT, Cond, DAG, DCI, Subtarget)) {
- return DAG.getNode(X86ISD::BLENDV, SDLoc(N), VT, ExtCond,
- N->getOperand(1), N->getOperand(2));
- }
- }
-
// Don't optimize before the condition has been transformed to a legal type
// and don't ever optimize vector selects that map to AVX512 mask-registers.
if (BitWidth < 8 || BitWidth > 64)
@@ -44235,7 +44224,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If this an avx512 target we can improve the use of zero masking by
// swapping the operands and inverting the condition.
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
- Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
+ Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
@@ -44244,6 +44233,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(DL, VT, CondNew, RHS, LHS);
}
+ // Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might
+ // get split by legalization.
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&
+ CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() &&
+ TLI.isTypeLegal(VT.getScalarType())) {
+ EVT ExtCondVT = VT.changeVectorElementTypeToInteger();
+ if (SDValue ExtCond = combineToExtendBoolVectorInReg(
+ ISD::SIGN_EXTEND, DL, ExtCondVT, Cond, DAG, DCI, Subtarget)) {
+ ExtCond = DAG.getNode(ISD::TRUNCATE, DL, CondVT, ExtCond);
+ return DAG.getSelect(DL, VT, ExtCond, LHS, RHS);
+ }
+ }
+
// Early exit check
if (!TLI.isTypeLegal(VT))
return SDValue();
diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll
index 470d3eab16ea5..68348e0c1a52e 100644
--- a/llvm/test/CodeGen/X86/vector-bo-select.ll
+++ b/llvm/test/CodeGen/X86/vector-bo-select.ll
@@ -540,51 +540,16 @@ define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
; AVX2-LABEL: fadd_v8f64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vaddpd %ymm3, %ymm1, %ymm1
@@ -609,8 +574,7 @@ define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -636,52 +600,15 @@ define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
; AVX2-LABEL: fsub_v8f64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vsubpd %ymm3, %ymm1, %ymm1
@@ -734,51 +661,16 @@ define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
; AVX2-LABEL: fmul_v8f64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
; AVX2-NEXT: vmulpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@@ -831,51 +723,16 @@ define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> nou
define <8 x double> @fdiv_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
; AVX2-LABEL: fdiv_v8f64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
; AVX2-NEXT: vdivpd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivpd %ymm3, %ymm1, %ymm1
@@ -964,8 +821,7 @@ define <8 x i32> @add_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -992,52 +848,15 @@ define <8 x i32> @add_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @add_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: add_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1
@@ -1191,8 +1010,7 @@ define <8 x i32> @sub_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -1218,52 +1036,15 @@ define <8 x i32> @sub_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @sub_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: sub_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubq %ymm3, %ymm1, %ymm1
@@ -1386,51 +1167,16 @@ define <8 x i32> @mul_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: mul_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1,1,1,1]
; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm4
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm4
@@ -1606,8 +1352,7 @@ define <8 x i32> @shl_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -1634,52 +1379,15 @@ define <8 x i32> @shl_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @shl_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: shl_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsllvq %ymm3, %ymm1, %ymm1
@@ -1833,8 +1541,7 @@ define <8 x i32> @lshr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -1861,52 +1568,15 @@ define <8 x i32> @lshr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @lshr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: lshr_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsrlvq %ymm3, %ymm1, %ymm1
@@ -2060,8 +1730,7 @@ define <8 x i32> @ashr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -2088,52 +1757,15 @@ define <8 x i32> @ashr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef
define <8 x i64> @ashr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
; AVX2-LABEL: ashr_v8i64_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: andb $1, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm4
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $3, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $4, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm4, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: andb $1, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX2-NEXT: shrb $7, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm5[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
-; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
-; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmovd %edi, %xmm4
+; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3
-; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero
-; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
-; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
-; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
+; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4
; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpsrlvq %ymm2, %ymm4, %ymm5
More information about the llvm-commits
mailing list