[llvm] fd8a4b0 - [X86] combineAndnp - fold ANDN(SEXT(SETCC()),X) -> SELECT(NOT(SETCC()),X,0) on AVX512 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 06:54:22 PDT 2024
Author: Simon Pilgrim
Date: 2024-10-14T14:54:03+01:00
New Revision: fd8a4b007330c214fc9cc6e2c255cc18fc3c6b0c
URL: https://github.com/llvm/llvm-project/commit/fd8a4b007330c214fc9cc6e2c255cc18fc3c6b0c
DIFF: https://github.com/llvm/llvm-project/commit/fd8a4b007330c214fc9cc6e2c255cc18fc3c6b0c.diff
LOG: [X86] combineAndnp - fold ANDN(SEXT(SETCC()),X) -> SELECT(NOT(SETCC()),X,0) on AVX512 targets
Reverse the generic foldVSelectToSignBitSplatMask fold on AVX512 targets where we can use the SETCC result directly in predicated moves/instructions.
Fixes #109272
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4c16d2eaac4cd2..e1ce5bbc7939d9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54021,6 +54021,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
int NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(N);
// ANDNP(undef, x) -> 0
@@ -54044,6 +54045,18 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
if (SDValue Not = IsNOT(N0, DAG))
return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
+ // On AVX512 targets, attempt to reverse foldVSelectToSignBitSplatMask.
+ // to make use of predicated selects.
+ // ANDN(SEXT(SETCC()),X) -> SELECT(NOT(SETCC()),X,0)
+ if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
+ TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
+ return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
+ getZeroVector(VT, Subtarget, DAG, DL));
+ }
+
// Constant Folding
APInt Undefs0, Undefs1;
SmallVector<APInt> EltBits0, EltBits1;
@@ -54110,7 +54123,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
std::tie(Bits0, Elts0) = GetDemandedMasks(N1);
std::tie(Bits1, Elts1) = GetDemandedMasks(N0, true);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedVectorElts(N0, Elts0, DCI) ||
TLI.SimplifyDemandedVectorElts(N1, Elts1, DCI) ||
TLI.SimplifyDemandedBits(N0, Bits0, Elts0, DCI) ||
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index 61814b48e6b3a3..32cfb9071f5c47 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -153,8 +153,8 @@ define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a
; CHECK: # %bb.0:
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpmovb2m %zmm1, %k0
-; CHECK-NEXT: vpmovm2b %k0, %zmm1
-; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: knotq %k0, %k1
+; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: ret{{[l|q]}}
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2)
%cmp = icmp slt <64 x i8> %a1, zeroinitializer
More information about the llvm-commits
mailing list