[llvm] [DAG] combineVSelectWithAllOnesOrZeros - fold select Cond, 0, x -> and not(Cond), x (PR #147472)

Tue Jul 8 00:48:15 PDT 2025

https://github.com/woruyu created https://github.com/llvm/llvm-project/pull/147472

### Summary
This patch extends the work from [#145298](https://github.com/llvm/llvm-project/pull/145298) by removing the now-unnecessary X86-specific combineVSelectWithLastZeros logic. That combine is now correctly and more generally handled in the target-independent combineVSelectWithAllOnesOrZeros.

This simplifies the X86 DAG combine logic and avoids duplication.

Fixes: [#144513](https://github.com/llvm/llvm-project/issues/144513)
Related for reference: [#146831](https://github.com/llvm/llvm-project/pull/146831)

>From a6245830a2c7c0c6e846c9f10583016b2d8429e5 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Tue, 8 Jul 2025 15:25:02 +0800
Subject: [PATCH] [DAG] combineVSelectWithAllOnesOrZeros - fold select Cond, 0,
 x -> and not(Cond), x

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 54 -------------------
 .../AArch64/sve-fixed-length-shuffles.ll      | 29 ++++------
 .../test/CodeGen/AArch64/vselect-constants.ll |  6 +--
 4 files changed, 26 insertions(+), 79 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e7f1fdf10719a..4f06e45cdd0c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9902,11 +9902,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     if (SDValue Combined = visitADDLike(N))
       return Combined;
 
-  // fold !(x cc y) -> (x !cc y)
+  // fold xor (setcc x y cc) -1 -> setcc x y !cc
+  // Avoid breaking: and (xor (setcc x y cc) -1) z -> andn for vec
   unsigned N0Opcode = N0.getOpcode();
   SDValue LHS, RHS, CC;
   if (TLI.isConstTrueVal(N1) &&
-      isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
+      isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
+      !(N->hasOneUse() && TLI.hasAndNot(SDValue(N, 0)) &&
+        N->use_begin()->getUser()->getOpcode() == ISD::AND && VT.isVector())) {
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
                                                LHS.getValueType());
     if (!LegalOperations ||
@@ -13165,6 +13168,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
     return DAG.getBitcast(VT, And);
   }
 
+  // select Cond, 0, x -> and not(Cond), x
+  if (IsTAllZero &&
+      (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
+    SDValue X = DAG.getBitcast(CondVT, FVal);
+    SDValue And =
+        DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
+    return DAG.getBitcast(VT, And);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fae80f25f71d1..fd617f7062313 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47260,57 +47260,6 @@ static SDValue combineToExtendBoolVectorInReg(
                      DAG.getConstant(EltSizeInBits - 1, DL, VT));
 }
 
-/// If a vector select has an left operand that is 0, try to simplify the
-/// select to a bitwise logic operation.
-/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
-/// TargetLowering::hasAndNot()?
-static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
-                                           const SDLoc &DL,
-                                           TargetLowering::DAGCombinerInfo &DCI,
-                                           const X86Subtarget &Subtarget) {
-  SDValue Cond = N->getOperand(0);
-  SDValue LHS = N->getOperand(1);
-  SDValue RHS = N->getOperand(2);
-  EVT VT = LHS.getValueType();
-  EVT CondVT = Cond.getValueType();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
-  if (N->getOpcode() != ISD::VSELECT)
-    return SDValue();
-
-  assert(CondVT.isVector() && "Vector select expects a vector selector!");
-
-  // To use the condition operand as a bitwise mask, it must have elements that
-  // are the same size as the select elements. Ie, the condition operand must
-  // have already been promoted from the IR select condition type <N x i1>.
-  // Don't check if the types themselves are equal because that excludes
-  // vector floating-point selects.
-  if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
-    return SDValue();
-
-  // Cond value must be 'sign splat' to be converted to a logical op.
-  if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
-    return SDValue();
-
-  if (!TLI.isTypeLegal(CondVT))
-    return SDValue();
-
-  // vselect Cond, 000..., X -> andn Cond, X
-  if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
-    SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
-    SDValue AndN;
-    // The canonical form differs for i1 vectors - x86andnp is not used
-    if (CondVT.getScalarType() == MVT::i1)
-      AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
-                         CastRHS);
-    else
-      AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
-    return DAG.getBitcast(VT, AndN);
-  }
-
-  return SDValue();
-}
-
 /// If both arms of a vector select are concatenated vectors, split the select,
 /// and concatenate the result to eliminate a wide (256-bit) vector instruction:
 ///   vselect Cond, (concat T0, T1), (concat F0, F1) -->
@@ -48052,9 +48001,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
     return SDValue();
 
-  if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
-    return V;
-
   if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
     return V;
 
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index d916f26f9b26b..c48ee3939bd2e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
 ; CHECK-NEXT:  // %bb.1: // %vector.body
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr z4, [x0]
+; CHECK-NEXT:    ldr z5, [x0, #2, mul vl]
+; CHECK-NEXT:    ldr z6, [x0, #3, mul vl]
 ; CHECK-NEXT:    umov w8, v0.b[8]
 ; CHECK-NEXT:    mov v1.b[1], v0.b[1]
 ; CHECK-NEXT:    fmov s2, w8
@@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
 ; CHECK-NEXT:    asr z1.s, z1.s, #31
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
-; CHECK-NEXT:    and z1.s, z1.s, #0x1
+; CHECK-NEXT:    bic z1.d, z4.d, z1.d
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ldr z4, [x0, #1, mul vl]
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
-; CHECK-NEXT:    cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT:    ldr z1, [x0]
+; CHECK-NEXT:    str z1, [x0]
 ; CHECK-NEXT:    lsl z3.s, z3.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    and z0.s, z0.s, #0x1
+; CHECK-NEXT:    bic z0.d, z5.d, z0.d
 ; CHECK-NEXT:    asr z3.s, z3.s, #31
-; CHECK-NEXT:    and z2.s, z2.s, #0x1
-; CHECK-NEXT:    mov z1.s, p1/m, #0 // =0x0
-; CHECK-NEXT:    cmpne p2.s, p0/z, z0.s, #0
-; CHECK-NEXT:    ldr z0, [x0, #2, mul vl]
-; CHECK-NEXT:    and z3.s, z3.s, #0x1
-; CHECK-NEXT:    str z1, [x0]
-; CHECK-NEXT:    cmpne p3.s, p0/z, z3.s, #0
-; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT:    ldr z3, [x0, #3, mul vl]
-; CHECK-NEXT:    ldr z2, [x0, #1, mul vl]
-; CHECK-NEXT:    mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT:    mov z3.s, p3/m, #0 // =0x0
-; CHECK-NEXT:    mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT:    bic z1.d, z4.d, z2.d
 ; CHECK-NEXT:    str z0, [x0, #2, mul vl]
+; CHECK-NEXT:    bic z3.d, z6.d, z3.d
+; CHECK-NEXT:    str z1, [x0, #1, mul vl]
 ; CHECK-NEXT:    str z3, [x0, #3, mul vl]
-; CHECK-NEXT:    str z2, [x0, #1, mul vl]
 ; CHECK-NEXT:  .LBB1_2: // %exit
 ; CHECK-NEXT:    ret
   %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll
index fe125c9626ea3..3c1f06e0e4ed1 100644
--- a/llvm/test/CodeGen/AArch64/vselect-constants.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll
@@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
 ; CHECK-LABEL: sel_0_or_1_vec:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    shl v0.4s, v0.4s, #31
-; CHECK-NEXT:    cmge v0.4s, v0.4s, #0
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %add