[llvm] [X86] combineSelect - relax "vselect (X & C == 0), LHS, RHS" --> "vselect (X & C != 0), RHS, LHS" type limitation (PR #173366)

Tue Dec 23 04:12:40 PST 2025

https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/173366

The comparison types don't have to match the select types - they just have to be the same vector element width (allows an integer comparison to select float results).

>From 387c0119c837fe30b8fc159073093540a936beb6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 23 Dec 2025 12:11:58 +0000
Subject: [PATCH] [X86] combineSelect - relax "vselect (X & C == 0), LHS, RHS"
 --> "vselect (X & C != 0), RHS, LHS" type limitation

The comparison types don't have to match the select types - they just have to be the same vector element width (allows an integer comparison to select float results).
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  4 ++--
 llvm/test/CodeGen/X86/vselect-pcmp.ll   | 25 +++++++------------------
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1fdf39839c757..811ffb090d751 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47914,6 +47914,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   EVT CondVT = Cond.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode());
+  unsigned EltBitWidth = VT.getScalarSizeInBits();
 
   // Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M).
   // Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT
@@ -48439,7 +48440,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
       Cond.getOperand(0).getOpcode() == ISD::AND &&
       isNullOrNullSplat(Cond.getOperand(1)) &&
       cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
-      Cond.getOperand(0).getValueType() == VT) {
+      Cond.getOperand(0).getScalarValueSizeInBits() == EltBitWidth) {
     // The 'and' mask must be composed of power-of-2 constants.
     SDValue And = Cond.getOperand(0);
     auto *C = isConstOrConstSplat(And.getOperand(1));
@@ -48453,7 +48454,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     // If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld
     // and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply.
     // 16-bit lacks a proper blendv.
-    unsigned EltBitWidth = VT.getScalarSizeInBits();
     bool CanShiftBlend =
         TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||
                                 (Subtarget.hasAVX2() && EltBitWidth == 64) ||
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index 8e6c4d83c7617..b9fdf71c62d2d 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -544,23 +544,12 @@ define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x
   ret <4 x i64> %r
 }
 
-; FIXME: use PSLLD(Z,31) like blend_splat1_mask_cond_v4i32
 define <4 x float> @blend_splat1_mask_cond_v4f32(<4 x i32> %x, <4 x float> %y, <4 x float> %z) {
-; AVX1-LABEL: blend_splat1_mask_cond_v4f32:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: blend_splat1_mask_cond_v4f32:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1]
-; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX2-NEXT:    retq
+; AVX12-LABEL: blend_splat1_mask_cond_v4f32:
+; AVX12:       # %bb.0:
+; AVX12-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX12-NEXT:    retq
 ;
 ; AVX512F-LABEL: blend_splat1_mask_cond_v4f32:
 ; AVX512F:       # %bb.0:
@@ -583,8 +572,8 @@ define <4 x float> @blend_splat1_mask_cond_v4f32(<4 x i32> %x, <4 x float> %y, <
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; XOP-NEXT:    vpcomeqd %xmm3, %xmm0, %xmm0
-; XOP-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
 ; XOP-NEXT:    retq
   %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %c = icmp eq <4 x i32> %a, zeroinitializer