[llvm] [X86] combineSelect - relax "vselect (X & C == 0), LHS, RHS" --> "vselect (X & C != 0), RHS, LHS" type limitation (PR #173366)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 23 04:12:40 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/173366
The comparison types don't have to match the select types - they just have to be the same vector element width (allows an integer comparison to select float results).
>From 387c0119c837fe30b8fc159073093540a936beb6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 23 Dec 2025 12:11:58 +0000
Subject: [PATCH] [X86] combineSelect - relax "vselect (X & C == 0), LHS, RHS"
--> "vselect (X & C != 0), RHS, LHS" type limitation
The comparison types don't have to match the select types - they just have to be the same vector element width (allows an integer comparison to select float results).
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
llvm/test/CodeGen/X86/vselect-pcmp.ll | 25 +++++++------------------
2 files changed, 9 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1fdf39839c757..811ffb090d751 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47914,6 +47914,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode());
+ unsigned EltBitWidth = VT.getScalarSizeInBits();
// Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M).
// Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT
@@ -48439,7 +48440,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
Cond.getOperand(0).getOpcode() == ISD::AND &&
isNullOrNullSplat(Cond.getOperand(1)) &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
- Cond.getOperand(0).getValueType() == VT) {
+ Cond.getOperand(0).getScalarValueSizeInBits() == EltBitWidth) {
// The 'and' mask must be composed of power-of-2 constants.
SDValue And = Cond.getOperand(0);
auto *C = isConstOrConstSplat(And.getOperand(1));
@@ -48453,7 +48454,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld
// and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply.
// 16-bit lacks a proper blendv.
- unsigned EltBitWidth = VT.getScalarSizeInBits();
bool CanShiftBlend =
TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||
(Subtarget.hasAVX2() && EltBitWidth == 64) ||
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index 8e6c4d83c7617..b9fdf71c62d2d 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -544,23 +544,12 @@ define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x
ret <4 x i64> %r
}
-; FIXME: use PSLLD(Z,31) like blend_splat1_mask_cond_v4i32
define <4 x float> @blend_splat1_mask_cond_v4f32(<4 x i32> %x, <4 x float> %y, <4 x float> %z) {
-; AVX1-LABEL: blend_splat1_mask_cond_v4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
-; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: blend_splat1_mask_cond_v4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1]
-; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
-; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX12-LABEL: blend_splat1_mask_cond_v4f32:
+; AVX12: # %bb.0:
+; AVX12-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX12-NEXT: retq
;
; AVX512F-LABEL: blend_splat1_mask_cond_v4f32:
; AVX512F: # %bb.0:
@@ -583,8 +572,8 @@ define <4 x float> @blend_splat1_mask_cond_v4f32(<4 x i32> %x, <4 x float> %y, <
; XOP: # %bb.0:
; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; XOP-NEXT: vpcomeqd %xmm3, %xmm0, %xmm0
-; XOP-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
+; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT: retq
%a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%c = icmp eq <4 x i32> %a, zeroinitializer
More information about the llvm-commits
mailing list