[llvm] [X86] Only fold AND/ANDNP back to VSELECT if we know the predicated mask select is legal (PR #156663)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 06:03:35 PDT 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/156663
By only checking type legality we didn't account for 128/256-bit ops being run on non-AVX512VL targets, or vXi8/i16 ops being run on non-AVX512BW targets
This check is cropping up in several places now and I intend to hoist it out into a common helper, but this initial fix needs to be as clean as possible to be back ported to 21.X
Fixes #156256
>From f6182e3b1b5e6549225512ed6a52a9ac4af1368d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 3 Sep 2025 14:00:11 +0100
Subject: [PATCH] [X86] Only fold AND/ANDNP back to VSELECT if we know the
predicated mask select is legal
By only checking type legality we didn't account for 128/256-bit ops being run on non-AVX512VL targets, or vXi8/i16 ops being run on non-AVX512BW targets
This check is cropping up in several places now and I intend to hoist it out into a common helper, but this initial fix needs to be as clean as possible to be back ported to 21.X
Fixes #156256
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++
llvm/test/CodeGen/X86/pr156256.ll | 25 +++++++++++++++++++++++++
2 files changed, 29 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/pr156256.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 572cfdad3c93b..47cea933d0836 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51856,6 +51856,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue X, Y;
EVT CondVT = VT.changeVectorElementType(MVT::i1);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(CondVT) &&
+ (VT.is512BitVector() || Subtarget.hasVLX()) &&
+ (VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
sd_match(N, m_And(m_Value(X),
m_OneUse(m_SExt(m_AllOf(
m_Value(Y), m_SpecificVT(CondVT),
@@ -55420,6 +55422,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
+ (VT.is512BitVector() || Subtarget.hasVLX()) &&
+ (VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
getZeroVector(VT, Subtarget, DAG, DL));
diff --git a/llvm/test/CodeGen/X86/pr156256.ll b/llvm/test/CodeGen/X86/pr156256.ll
new file mode 100644
index 0000000000000..13caa6fee5878
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr156256.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL
+
+define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) {
+; AVX512-LABEL: PR156256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512-NEXT: vpmovm2d %k0, %zmm0
+; AVX512-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX512VL-LABEL: PR156256:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: vpmovm2d %k0, %zmm0
+; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+ %icmp = icmp ugt <16 x i32> %a, %b
+ %sext = sext <16 x i1> %icmp to <16 x i16>
+ %and = and <16 x i16> %sext, splat (i16 16256)
+ ret <16 x i16> %and
+}
More information about the llvm-commits
mailing list