[llvm] 7c25db3 - [DAG] Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z))). (#141476)

Mon Jun 16 07:55:29 PDT 2025

Author: Xu Zhang
Date: 2025-06-16T15:55:26+01:00
New Revision: 7c25db3fbfc63f76b270940e341f267e497e95d9

URL: https://github.com/llvm/llvm-project/commit/7c25db3fbfc63f76b270940e341f267e497e95d9
DIFF: https://github.com/llvm/llvm-project/commit/7c25db3fbfc63f76b270940e341f267e497e95d9.diff

LOG: [DAG] Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z))). (#141476)

Fixes #140639

---------

Co-authored-by: Simon Pilgrim <llvm-dev at redking.me.uk>

Added: 
    llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded171f4f..f6d811ddba8ab 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -396,6 +396,8 @@ namespace {
     bool PromoteLoad(SDValue Op);
 
     SDValue foldShiftToAvg(SDNode *N);
+    // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
+    SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
 
     SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
                                 SDValue RHS, SDValue True, SDValue False,
@@ -7541,6 +7543,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return DAG.getNode(ISD::AND, DL, VT, X,
                          DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
 
+  // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
+  // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
+  if (TLI.hasAndNot(SDValue(N, 0)))
+    if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
+      return Folded;
+
   // Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
   // If we are shifting down an extended sign bit, see if we can simplify
   // this to shifting the MSB directly to expose further simplifications.
@@ -11652,6 +11660,22 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
   return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
 }
 
+SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
+  unsigned Opc = N->getOpcode();
+  SDValue X, Y, Z;
+  if (sd_match(
+          N, m_BitwiseLogic(m_Value(X), m_Add(m_Not(m_Value(Y)), m_Value(Z)))))
+    return DAG.getNode(Opc, DL, VT, X,
+                       DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
+
+  if (sd_match(N, m_BitwiseLogic(m_Value(X), m_Sub(m_OneUse(m_Not(m_Value(Y))),
+                                                   m_Value(Z)))))
+    return DAG.getNode(Opc, DL, VT, X,
+                       DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
+
+  return SDValue();
+}
+
 /// Generate Min/Max node
 SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
                                          SDValue RHS, SDValue True,

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
new file mode 100644
index 0000000000000..5fbf38b2560d4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-linux | FileCheck %s
+
+define i8 @andnot_add_with_neg_i8(i8 %a0, i8 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i8 %a0, -1
+  %sum = add i8 %not, %a1
+  %and = and i8 %sum, %a0
+  ret i8 %and
+}
+
+define i8 @andnot_sub_with_neg_i8(i8 %a0, i8 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i8 %a0, -1
+  %
diff  = sub i8 %not, %a1
+  %and = and i8 %
diff , %a0
+  ret i8 %and
+}
+
+define i16 @andnot_add_with_neg_i16(i16 %a0, i16 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i16 %a0, -1
+  %sum = add i16 %not, %a1
+  %and = and i16 %sum, %a0
+  ret i16 %and
+}
+
+define i16 @andnot_sub_with_neg_i16(i16 %a0, i16 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i16 %a0, -1
+  %
diff  = sub i16 %not, %a1
+  %and = and i16 %
diff , %a0
+  ret i16 %and
+}
+
+define i32 @andnot_add_with_neg_i32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i32 %a0, -1
+  %sum = add i32 %not, %a1
+  %and = and i32 %sum, %a0
+  ret i32 %and
+}
+
+define i32 @andnot_sub_with_neg_i32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    bic w0, w0, w8
+; CHECK-NEXT:    ret
+  %not = xor i32 %a0, -1
+  %
diff  = sub i32 %not, %a1
+  %and = and i32 %
diff , %a0
+  ret i32 %and
+}
+
+define i64 @andnot_add_with_neg_i64(i64 %a0, i64 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x8, x0, x1
+; CHECK-NEXT:    bic x0, x0, x8
+; CHECK-NEXT:    ret
+  %not = xor i64 %a0, -1
+  %sum = add i64 %not, %a1
+  %and = and i64 %sum, %a0
+  ret i64 %and
+}
+
+define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x1
+; CHECK-NEXT:    bic x0, x0, x8
+; CHECK-NEXT:    ret
+  %not = xor i64 %a0, -1
+  %
diff  = sub i64 %not, %a1
+  %and = and i64 %
diff , %a0
+  ret i64 %and
+}

diff  --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index e564d7bddea6f..27be02c50f1c7 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -885,9 +885,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 define i8 @test_not_cttz_i8(i8 %a) nounwind {
 ; LA32R-LABEL: test_not_cttz_i8:
 ; LA32R:       # %bb.0:
-; LA32R-NEXT:    nor $a1, $a0, $zero
-; LA32R-NEXT:    addi.w $a1, $a1, -1
-; LA32R-NEXT:    and $a0, $a0, $a1
+; LA32R-NEXT:    addi.w $a1, $a0, 1
+; LA32R-NEXT:    andn $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    andi $a1, $a1, 85
 ; LA32R-NEXT:    sub.w $a0, $a0, $a1
@@ -921,9 +920,8 @@ define i8 @test_not_cttz_i8(i8 %a) nounwind {
 define i16 @test_not_cttz_i16(i16 %a) nounwind {
 ; LA32R-LABEL: test_not_cttz_i16:
 ; LA32R:       # %bb.0:
-; LA32R-NEXT:    nor $a1, $a0, $zero
-; LA32R-NEXT:    addi.w $a1, $a1, -1
-; LA32R-NEXT:    and $a0, $a0, $a1
+; LA32R-NEXT:    addi.w $a1, $a0, 1
+; LA32R-NEXT:    andn $a0, $a0, $a1
 ; LA32R-NEXT:    srli.w $a1, $a0, 1
 ; LA32R-NEXT:    lu12i.w $a2, 5
 ; LA32R-NEXT:    ori $a2, $a2, 1365