[llvm] 86879d4 - [X86] Only fold AND/ANDNP back to VSELECT if we know the predicated mask select is legal (#156663)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 07:01:17 PDT 2025
Author: Simon Pilgrim
Date: 2025-09-03T15:01:12+01:00
New Revision: 86879d46f6476386dc07772ede83cd43b6ddd739
URL: https://github.com/llvm/llvm-project/commit/86879d46f6476386dc07772ede83cd43b6ddd739
DIFF: https://github.com/llvm/llvm-project/commit/86879d46f6476386dc07772ede83cd43b6ddd739.diff
LOG: [X86] Only fold AND/ANDNP back to VSELECT if we know the predicated mask select is legal (#156663)
By only checking type legality we didn't account for 128/256-bit ops
being run on non-AVX512VL targets, or vXi8/i16 ops being run on
non-AVX512BW targets
This check is cropping up in several places now and I intend to hoist it
out into a common helper, but this initial fix needs to be as clean as
possible to be back ported to 21.X
Fixes #156256
Added:
llvm/test/CodeGen/X86/pr156256.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 572cfdad3c93b..47cea933d0836 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51856,6 +51856,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue X, Y;
EVT CondVT = VT.changeVectorElementType(MVT::i1);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(CondVT) &&
+ (VT.is512BitVector() || Subtarget.hasVLX()) &&
+ (VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
sd_match(N, m_And(m_Value(X),
m_OneUse(m_SExt(m_AllOf(
m_Value(Y), m_SpecificVT(CondVT),
@@ -55420,6 +55422,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
+ (VT.is512BitVector() || Subtarget.hasVLX()) &&
+ (VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
getZeroVector(VT, Subtarget, DAG, DL));
diff --git a/llvm/test/CodeGen/X86/pr156256.ll b/llvm/test/CodeGen/X86/pr156256.ll
new file mode 100644
index 0000000000000..13caa6fee5878
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr156256.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL
+
+define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) {
+; AVX512-LABEL: PR156256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %zmm0
+; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: PR156256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: vpmovm2d %k0, %zmm0
+; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+ %icmp = icmp ugt <16 x i32> %a, %b
+ %sext = sext <16 x i1> %icmp to <16 x i16>
+ %and = and <16 x i16> %sext, splat (i16 16256)
+ ret <16 x i16> %and
+}
More information about the llvm-commits
mailing list