[llvm] 20af71f - [X86] combineVSelectToBLENDV - handle vselect(vXi1,A,B) -> blendv(sext(vXi1),A,B)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 11 10:38:26 PST 2022
Author: Simon Pilgrim
Date: 2022-02-11T18:38:17Z
New Revision: 20af71f8ec47319d375a871db6fd3889c2487cbd
URL: https://github.com/llvm/llvm-project/commit/20af71f8ec47319d375a871db6fd3889c2487cbd
DIFF: https://github.com/llvm/llvm-project/commit/20af71f8ec47319d375a871db6fd3889c2487cbd.diff
LOG: [X86] combineVSelectToBLENDV - handle vselect(vXi1,A,B) -> blendv(sext(vXi1),A,B)
For pre-AVX512 targets, attempt to sign-extend a vXi1 condition mask to pass to a X86ISD::BLENDV node
Fixes Issue #53760
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-bo-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e91f68425522f..28b1a415b7711 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43438,19 +43438,17 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// This function will also call SimplifyDemandedBits on already created
/// BLENDV to perform additional simplifications.
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
if ((N->getOpcode() != ISD::VSELECT &&
N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
- // Don't optimize before the condition has been transformed to a legal type
- // and don't ever optimize vector selects that map to AVX512 mask-registers.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned BitWidth = Cond.getScalarValueSizeInBits();
- if (BitWidth < 8 || BitWidth > 64)
- return SDValue();
+ EVT VT = N->getValueType(0);
// We can only handle the cases where VSELECT is directly legal on the
// subtarget. We custom lower VSELECT nodes with constant conditions and
@@ -43462,8 +43460,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
// Potentially, we should combine constant-condition vselect nodes
// pre-legalization into shuffles and not mark as many types as custom
// lowered.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = N->getValueType(0);
if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
return SDValue();
// FIXME: We don't support i16-element blends currently. We could and
@@ -43481,6 +43477,22 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();
+ // PreAVX512, without mask-registers, attempt to sign-extend bool vectors to
+ // allow us to use BLENDV.
+ if (!Subtarget.hasAVX512() && BitWidth == 1) {
+ EVT CondVT = VT.changeVectorElementTypeToInteger();
+ if (SDValue ExtCond = combineToExtendBoolVectorInReg(
+ ISD::SIGN_EXTEND, SDLoc(N), CondVT, Cond, DAG, DCI, Subtarget)) {
+ return DAG.getNode(X86ISD::BLENDV, SDLoc(N), VT, ExtCond,
+ N->getOperand(1), N->getOperand(2));
+ }
+ }
+
+ // Don't optimize before the condition has been transformed to a legal type
+ // and don't ever optimize vector selects that map to AVX512 mask-registers.
+ if (BitWidth < 8 || BitWidth > 64)
+ return SDValue();
+
auto OnlyUsedAsSelectCond = [](SDValue Cond) {
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI)
diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll
index 74549f95a2625..a3ba1601c5f3f 100644
--- a/llvm/test/CodeGen/X86/vector-bo-select.ll
+++ b/llvm/test/CodeGen/X86/vector-bo-select.ll
@@ -507,51 +507,11 @@ define <16 x float> @fdiv_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef
define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fadd_v8f32_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: shrb $4, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $7, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $3, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
@@ -644,52 +604,13 @@ define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fsub_v8f32_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: shrb $4, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $7, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $3, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
-; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -780,51 +701,11 @@ define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fmul_v8f32_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: shrb $4, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $7, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $3, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
@@ -917,51 +798,11 @@ define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fdiv_v8f32_cast_cond:
; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $5, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: movl %edi, %ecx
-; AVX2-NEXT: shrb $4, %cl
-; AVX2-NEXT: movzbl %cl, %ecx
-; AVX2-NEXT: andl $1, %ecx
-; AVX2-NEXT: negl %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $6, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $7, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vmovd %eax, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: shrb $2, %al
-; AVX2-NEXT: movzbl %al, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX2-NEXT: shrb $3, %dil
-; AVX2-NEXT: movzbl %dil, %eax
-; AVX2-NEXT: andl $1, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vdivps %ymm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list