[llvm] [DAG] Optimize Constant Xor And And Not Operation (PR #161784)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 14 20:06:41 PDT 2025


https://github.com/manik-muk updated https://github.com/llvm/llvm-project/pull/161784

>From af5dcb54f7888cbc07183f6be852f29e321afc6b Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 3 Oct 2025 01:03:52 -0400
Subject: [PATCH 1/9] added optimization and tests

---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 63 +++++++++++++++++++
 .../CodeGen/X86/constant-xor-and-andnot.ll    | 63 +++++++++++++++++++
 llvm/test/CodeGen/X86/pr108731.ll             | 12 ++--
 3 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/constant-xor-and-andnot.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cdc97faf394ca..6c562ccf5b363 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+/// This allows the andn operation to be done in parallel with the xor
+static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
+                                           SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  using namespace llvm::SDPatternMatch;
+
+  EVT VT = N->getValueType(0);
+  // Only handle scalar integer types that support BMI instructions
+  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Check if N0 is AND(XOR(Constant, a), b)
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue AndLHS = N0.getOperand(0);
+  SDValue AndRHS = N0.getOperand(1);
+
+  // Check if one operand is XOR(Constant, a)
+  SDValue XorOp, OtherOp;
+  if (AndLHS.getOpcode() == ISD::XOR) {
+    XorOp = AndLHS;
+    OtherOp = AndRHS;
+  } else if (AndRHS.getOpcode() == ISD::XOR) {
+    XorOp = AndRHS;
+    OtherOp = AndLHS;
+  } else {
+    return SDValue();
+  }
+
+  // Check if XOR has a constant operand
+  if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
+      !isa<ConstantSDNode>(XorOp.getOperand(1))) {
+    return SDValue();
+  }
+
+  // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+  SDValue NotOp;
+  if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
+    NotOp = N1.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+  // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+  // This allows the andn (b & ~c) to be done in parallel with the xor
+
+  // Create AND(b, NOT(c)) - this will become andn
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+  // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
+  return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+}
+
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
                                        SelectionDAG &DAG,
@@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
     return R;
 
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
new file mode 100644
index 0000000000000..5a4d931d29896
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+
+; Test the optimization described in issue #161630:
+; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
+
+define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
+; CHECK-NEXT:    andnl %esi, %edx, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+  %xor = xor i32 %a, 5678
+  %and1 = and i32 %xor, %b
+  %not_c = xor i32 %c, -1
+  %result = and i32 %and1, %not_c
+  ret i32 %result
+}
+
+; Test with different operand order
+define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %b, %xor
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+; Test with different operand order for the final AND
+define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andq %rsi, %rdi
+; CHECK-NEXT:    andnq %rdi, %rdx, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %not_c, %and1
+  ret i64 %result
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
index 2983d108eaedd..bda90117a1be4 100644
--- a/llvm/test/CodeGen/X86/pr108731.ll
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
 ; BMI-LABEL: test_i64:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andq %rdx, %rsi
-; BMI-NEXT:    andnq %rdi, %rsi, %rax
-; BMI-NEXT:    andnq %rcx, %rdx, %rcx
-; BMI-NEXT:    andnq %rax, %rcx, %rax
+; BMI-NEXT:    andnq %rcx, %rdx, %rax
+; BMI-NEXT:    andnq %rdi, %rax, %rax
+; BMI-NEXT:    andnq %rax, %rsi, %rax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i64 %y, %x
@@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
 ; BMI-LABEL: test_i32:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andl %edx, %esi
-; BMI-NEXT:    andnl %edi, %esi, %eax
-; BMI-NEXT:    andnl %ecx, %edx, %ecx
-; BMI-NEXT:    andnl %eax, %ecx, %eax
+; BMI-NEXT:    andnl %ecx, %edx, %eax
+; BMI-NEXT:    andnl %edi, %eax, %eax
+; BMI-NEXT:    andnl %eax, %esi, %eax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i32 %y, %x

>From b176fd6e56d22c8f06190246ab5b5a2871776060 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 16:32:10 -0400
Subject: [PATCH 2/9] Move constant XOR AND ANDNOT optimization to generic DAG
 combiner

This moves the optimization from X86-specific code to the generic
reassociateOpsCommutative function in DAGCombiner.cpp. The optimization
transforms (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
to allow ANDNOT operations to be done in parallel with XOR operations.

This benefits all targets that have ANDNOT instructions (X86 BMI, ARM BIC,
RISC-V, etc.) rather than being limited to X86 only.

- Remove X86-specific combineConstantXorAndAndNot function
- Add generic optimization to reassociateOpsCommutative with TLI.hasAndNot check
- Update test expectations for the new optimized output
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 62 -------------------
 .../CodeGen/X86/constant-xor-and-andnot.ll    |  4 +-
 3 files changed, 37 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ef2b35952833..4241019d47ec1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1262,6 +1262,41 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
     if (N1 == N00 || N1 == N01)
       return N0;
   }
+
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
+    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+    
+    // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+    if (N1.getOpcode() == ISD::XOR && 
+        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+        isAllOnesConstant(N1.getOperand(1))) {
+      
+      // Check if one operand of N0 is XOR(Constant, a)
+      SDValue XorOp, OtherOp;
+      if (N00.getOpcode() == ISD::XOR) {
+        XorOp = N00;
+        OtherOp = N01;
+      } else if (N01.getOpcode() == ISD::XOR) {
+        XorOp = N01;
+        OtherOp = N00;
+      } else {
+        return SDValue();
+      }
+      
+      // Check if XOR has a constant operand
+      if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
+          DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
+        // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+        // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+        // This allows the andn (b & ~c) to be done in parallel with the xor
+        SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+        return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+      }
+    }
+  }
   if (Opc == ISD::XOR) {
     // (N00 ^ N01) ^ N00 --> N01
     if (N1 == N00)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6c562ccf5b363..d32cb680594c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,63 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-/// This allows the andn operation to be done in parallel with the xor
-static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
-                                           SelectionDAG &DAG,
-                                           const X86Subtarget &Subtarget) {
-  using namespace llvm::SDPatternMatch;
-
-  EVT VT = N->getValueType(0);
-  // Only handle scalar integer types that support BMI instructions
-  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
-    return SDValue();
-
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  // Check if N0 is AND(XOR(Constant, a), b)
-  if (N0.getOpcode() != ISD::AND)
-    return SDValue();
-
-  SDValue AndLHS = N0.getOperand(0);
-  SDValue AndRHS = N0.getOperand(1);
-
-  // Check if one operand is XOR(Constant, a)
-  SDValue XorOp, OtherOp;
-  if (AndLHS.getOpcode() == ISD::XOR) {
-    XorOp = AndLHS;
-    OtherOp = AndRHS;
-  } else if (AndRHS.getOpcode() == ISD::XOR) {
-    XorOp = AndRHS;
-    OtherOp = AndLHS;
-  } else {
-    return SDValue();
-  }
-
-  // Check if XOR has a constant operand
-  if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
-      !isa<ConstantSDNode>(XorOp.getOperand(1))) {
-    return SDValue();
-  }
-
-  // Check if N1 is NOT(c) - i.e., XOR(c, -1)
-  SDValue NotOp;
-  if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
-    NotOp = N1.getOperand(0);
-  } else {
-    return SDValue();
-  }
-
-  // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-  // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-  // This allows the andn (b & ~c) to be done in parallel with the xor
-
-  // Create AND(b, NOT(c)) - this will become andn
-  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
-  // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
-  return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-}
 
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
@@ -51891,11 +51834,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
     return R;
 
-  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-  // This allows the andn operation to be done in parallel with the xor
-  if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
-    return R;
-
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 5a4d931d29896..923d065962081 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -52,8 +52,8 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andq %rsi, %rdi
-; CHECK-NEXT:    andnq %rdi, %rdx, %rax
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b

>From 4a2e54661ca9758d6277e63992b80b065ff64588 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 19:30:44 -0400
Subject: [PATCH 3/9] changed combiner logic to account for infinite loops

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4241019d47ec1..9b30f7a672c7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7498,6 +7498,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
+    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+    
+    // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
+    SDValue AndOp, NotOp;
+    if (N0.getOpcode() == ISD::AND && 
+        N1.getOpcode() == ISD::XOR && 
+        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+        isAllOnesConstant(N1.getOperand(1))) {
+      AndOp = N0;
+      NotOp = N1;
+    } else if (N1.getOpcode() == ISD::AND &&
+               N0.getOpcode() == ISD::XOR && 
+               DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+               isAllOnesConstant(N0.getOperand(1))) {
+      AndOp = N1;
+      NotOp = N0;
+    } else {
+      goto skip_optimization;
+    }
+    
+    // Prevent infinite loops: only apply if the AND node has one use
+    if (!AndOp.hasOneUse())
+      goto skip_optimization;
+    
+    SDValue AndOp0 = AndOp.getOperand(0);
+    SDValue AndOp1 = AndOp.getOperand(1);
+    
+    // Check if one operand of AndOp is XOR(Constant, a)
+    SDValue XorOp, OtherOp;
+    if (AndOp0.getOpcode() == ISD::XOR) {
+      XorOp = AndOp0;
+      OtherOp = AndOp1;
+    } else if (AndOp1.getOpcode() == ISD::XOR) {
+      XorOp = AndOp1;
+      OtherOp = AndOp0;
+    } else {
+      goto skip_optimization;
+    }
+    
+    // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
+    if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+         !isAllOnesConstant(XorOp.getOperand(0))) ||
+        (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+         !isAllOnesConstant(XorOp.getOperand(1)))) {
+      // Prevent infinite loops: only apply if OtherOp is not also a NOT
+      if (OtherOp.getOpcode() == ISD::XOR && 
+          DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+          isAllOnesConstant(OtherOp.getOperand(1))) {
+        goto skip_optimization;
+      }
+      // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+      // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+      // This allows the andn (b & ~c) to be done in parallel with the xor
+      SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+      return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+    }
+  }
+skip_optimization:
+
   // reassociate and
   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
     return RAND;

>From 7fb0e39bcb733850a671a8c922719cda80d05e31 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 20:13:32 -0400
Subject: [PATCH 4/9] refactored to remove goto

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9b30f7a672c7f..e92e1319bb0f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7519,47 +7519,45 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       AndOp = N1;
       NotOp = N0;
     } else {
-      goto skip_optimization;
+      // Pattern doesn't match, continue to next optimization
     }
     
-    // Prevent infinite loops: only apply if the AND node has one use
-    if (!AndOp.hasOneUse())
-      goto skip_optimization;
-    
-    SDValue AndOp0 = AndOp.getOperand(0);
-    SDValue AndOp1 = AndOp.getOperand(1);
-    
-    // Check if one operand of AndOp is XOR(Constant, a)
-    SDValue XorOp, OtherOp;
-    if (AndOp0.getOpcode() == ISD::XOR) {
-      XorOp = AndOp0;
-      OtherOp = AndOp1;
-    } else if (AndOp1.getOpcode() == ISD::XOR) {
-      XorOp = AndOp1;
-      OtherOp = AndOp0;
-    } else {
-      goto skip_optimization;
-    }
-    
-    // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
-    if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
-         !isAllOnesConstant(XorOp.getOperand(0))) ||
-        (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
-         !isAllOnesConstant(XorOp.getOperand(1)))) {
-      // Prevent infinite loops: only apply if OtherOp is not also a NOT
-      if (OtherOp.getOpcode() == ISD::XOR && 
-          DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
-          isAllOnesConstant(OtherOp.getOperand(1))) {
-        goto skip_optimization;
+    // If we found a valid pattern, check if the AND node has one use
+    if (AndOp && NotOp && AndOp.hasOneUse()) {
+      SDValue AndOp0 = AndOp.getOperand(0);
+      SDValue AndOp1 = AndOp.getOperand(1);
+      
+      // Check if one operand of AndOp is XOR(Constant, a)
+      SDValue XorOp, OtherOp;
+      if (AndOp0.getOpcode() == ISD::XOR) {
+        XorOp = AndOp0;
+        OtherOp = AndOp1;
+      } else if (AndOp1.getOpcode() == ISD::XOR) {
+        XorOp = AndOp1;
+        OtherOp = AndOp0;
+      } else {
+        // No XOR found in AND operands, continue to next optimization
+      }
+      
+      // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
+      if (XorOp && OtherOp &&
+          ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+            !isAllOnesConstant(XorOp.getOperand(0))) ||
+           (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+            !isAllOnesConstant(XorOp.getOperand(1))))) {
+        // Prevent infinite loops: only apply if OtherOp is not also a NOT
+        if (!(OtherOp.getOpcode() == ISD::XOR && 
+              DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+              isAllOnesConstant(OtherOp.getOperand(1)))) {
+          // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+          // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+          // This allows the andn (b & ~c) to be done in parallel with the xor
+          SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+          return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+        }
       }
-      // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-      // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-      // This allows the andn (b & ~c) to be done in parallel with the xor
-      SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
-      return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
     }
   }
-skip_optimization:
 
   // reassociate and
   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))

>From d9c1a7568fb75a69c7dcc7f2fef25d637e0b4f00 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 10 Oct 2025 00:26:23 -0400
Subject: [PATCH 5/9] addressed comments

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 ++++++++-----------
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  1 -
 .../CodeGen/X86/constant-xor-and-andnot.ll    | 35 +++++---
 3 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e92e1319bb0f8..45ddb02a96d92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7501,60 +7501,43 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
   // This allows the andn operation to be done in parallel with the xor
   if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
-    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
-    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
-    
-    // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
-    SDValue AndOp, NotOp;
-    if (N0.getOpcode() == ISD::AND && 
-        N1.getOpcode() == ISD::XOR && 
-        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
-        isAllOnesConstant(N1.getOperand(1))) {
-      AndOp = N0;
-      NotOp = N1;
-    } else if (N1.getOpcode() == ISD::AND &&
-               N0.getOpcode() == ISD::XOR && 
-               DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
-               isAllOnesConstant(N0.getOperand(1))) {
-      AndOp = N1;
-      NotOp = N0;
-    } else {
-      // Pattern doesn't match, continue to next optimization
-    }
+    SDValue InnerAndOp0, InnerAndOp1, NotArg;
     
-    // If we found a valid pattern, check if the AND node has one use
-    if (AndOp && NotOp && AndOp.hasOneUse()) {
-      SDValue AndOp0 = AndOp.getOperand(0);
-      SDValue AndOp1 = AndOp.getOperand(1);
+    // Match: AND(AND(Op0, Op1), NOT(NotArg))
+    // where NOT is represented as XOR with all-ones
+    // m_And automatically handles commutativity
+    if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), 
+                                         m_Value(InnerAndOp1))),
+                          m_Xor(m_Value(NotArg), m_AllOnes())))) {
       
-      // Check if one operand of AndOp is XOR(Constant, a)
+      // Determine which operand is XOR(Constant, X) where Constant is not all-ones
       SDValue XorOp, OtherOp;
-      if (AndOp0.getOpcode() == ISD::XOR) {
-        XorOp = AndOp0;
-        OtherOp = AndOp1;
-      } else if (AndOp1.getOpcode() == ISD::XOR) {
-        XorOp = AndOp1;
-        OtherOp = AndOp0;
+      APInt XorConst;
+      
+      // Try first operand - m_Xor handles commutativity for XOR operands
+      if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+          !XorConst.isAllOnes()) {
+        XorOp = InnerAndOp0;
+        OtherOp = InnerAndOp1;
+      } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+                 !XorConst.isAllOnes()) {
+        XorOp = InnerAndOp1;
+        OtherOp = InnerAndOp0;
       } else {
-        // No XOR found in AND operands, continue to next optimization
+        // Pattern doesn't match - no XOR(Constant, X) found
+        XorOp = SDValue();
       }
       
-      // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
-      if (XorOp && OtherOp &&
-          ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
-            !isAllOnesConstant(XorOp.getOperand(0))) ||
-           (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
-            !isAllOnesConstant(XorOp.getOperand(1))))) {
-        // Prevent infinite loops: only apply if OtherOp is not also a NOT
-        if (!(OtherOp.getOpcode() == ISD::XOR && 
-              DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
-              isAllOnesConstant(OtherOp.getOperand(1)))) {
-          // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-          // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-          // This allows the andn (b & ~c) to be done in parallel with the xor
-          SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
-          return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-        }
+      // If we found the pattern, apply the transformation
+      // Prevent infinite loops by checking OtherOp is not also a NOT
+      if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) {
+        // Get the NOT node (either N0 or N1)
+        SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1;
+        
+        // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+        // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+        SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+        return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
       }
     }
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d32cb680594c6..cdc97faf394ca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,7 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
                                        SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 923d065962081..76056a413f904 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI
 
 ; Test the optimization described in issue #161630:
 ; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
@@ -7,9 +8,13 @@
 define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b
@@ -21,9 +26,13 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_32:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
-; CHECK-NEXT:    andnl %esi, %edx, %eax
-; CHECK-NEXT:    andl %edi, %eax
+; NOBMI-NEXT:    andl %esi, %edi
+; NOBMI-NEXT:    notl %eax
+; NOBMI-NEXT:    andl %edi, %eax
+; BMI-NEXT:      andnl %esi, %edx, %eax
+; BMI-NEXT:      andl %edi, %eax
 ; CHECK-NEXT:    retq
   %xor = xor i32 %a, 5678
   %and1 = and i32 %xor, %b
@@ -36,9 +45,13 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
 define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %b, %xor
@@ -51,9 +64,13 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
 define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b

>From 6fcb51fe8f0d9e275f92f142af7d663c5a267b4a Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 10 Oct 2025 02:46:22 -0400
Subject: [PATCH 6/9] only changed modified lines of code

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 36c1721ef7331..f00d458a2eff8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1268,12 +1268,12 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
   if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
     // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
     // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
-    
+
     // Check if N1 is NOT(c) - i.e., XOR(c, -1)
-    if (N1.getOpcode() == ISD::XOR && 
+    if (N1.getOpcode() == ISD::XOR &&
         DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
         isAllOnesConstant(N1.getOperand(1))) {
-      
+
       // Check if one operand of N0 is XOR(Constant, a)
       SDValue XorOp, OtherOp;
       if (N00.getOpcode() == ISD::XOR) {
@@ -1285,7 +1285,7 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
       } else {
         return SDValue();
       }
-      
+
       // Check if XOR has a constant operand
       if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
           DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
@@ -7557,24 +7557,26 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // This allows the andn operation to be done in parallel with the xor
   if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
     SDValue InnerAndOp0, InnerAndOp1, NotArg;
-    
+
     // Match: AND(AND(Op0, Op1), NOT(NotArg))
     // where NOT is represented as XOR with all-ones
     // m_And automatically handles commutativity
-    if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), 
+    if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0),
                                          m_Value(InnerAndOp1))),
                           m_Xor(m_Value(NotArg), m_AllOnes())))) {
-      
-      // Determine which operand is XOR(Constant, X) where Constant is not all-ones
+
+      // Determine which operand is XOR(Constant, X) where Constant is not
+      // all-ones
       SDValue XorOp, OtherOp;
       APInt XorConst;
-      
+
       // Try first operand - m_Xor handles commutativity for XOR operands
       if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) &&
           !XorConst.isAllOnes()) {
         XorOp = InnerAndOp0;
         OtherOp = InnerAndOp1;
-      } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+      } else if (sd_match(InnerAndOp1,
+                          m_Xor(m_ConstInt(XorConst), m_Value())) &&
                  !XorConst.isAllOnes()) {
         XorOp = InnerAndOp1;
         OtherOp = InnerAndOp0;
@@ -7582,13 +7584,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         // Pattern doesn't match - no XOR(Constant, X) found
         XorOp = SDValue();
       }
-      
+
       // If we found the pattern, apply the transformation
       // Prevent infinite loops by checking OtherOp is not also a NOT
       if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) {
         // Get the NOT node (either N0 or N1)
         SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1;
-        
+
         // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
         // To: AND(XOR(Constant, a), AND(b, NOT(c)))
         SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);

>From 9022deb5a772f3e05bd309b24b23af53079e9402 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 10 Oct 2025 03:42:40 -0400
Subject: [PATCH 7/9] refactored to use sd match

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 48 ++++++++-----------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f00d458a2eff8..9cff8380b2a1e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1265,37 +1265,31 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
 
   // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
   // This allows the andn operation to be done in parallel with the xor
-  if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
+  if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) &&
+      sd_match(N1, m_Xor(m_Value(), m_AllOnes()))) {
     // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
     // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
 
-    // Check if N1 is NOT(c) - i.e., XOR(c, -1)
-    if (N1.getOpcode() == ISD::XOR &&
-        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
-        isAllOnesConstant(N1.getOperand(1))) {
-
-      // Check if one operand of N0 is XOR(Constant, a)
-      SDValue XorOp, OtherOp;
-      if (N00.getOpcode() == ISD::XOR) {
-        XorOp = N00;
-        OtherOp = N01;
-      } else if (N01.getOpcode() == ISD::XOR) {
-        XorOp = N01;
-        OtherOp = N00;
-      } else {
-        return SDValue();
-      }
-
-      // Check if XOR has a constant operand
-      if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
-          DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
-        // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-        // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-        // This allows the andn (b & ~c) to be done in parallel with the xor
-        SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
-        return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-      }
+    SDValue XorOp, OtherOp;
+    APInt XorConst;
+
+    // Check which operand of N0 is XOR(Constant, X)
+    if (sd_match(N00, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+        !XorConst.isAllOnes()) {
+      XorOp = N00;
+      OtherOp = N01;
+    } else if (sd_match(N01, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+               !XorConst.isAllOnes()) {
+      XorOp = N01;
+      OtherOp = N00;
+    } else {
+      return SDValue();
     }
+
+    // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+    // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+    SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+    return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
   }
   if (Opc == ISD::XOR) {
     // (N00 ^ N01) ^ N00 --> N01

>From 7e6ffa78246c809a2562f188b6eb420c0e1047ba Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Mon, 13 Oct 2025 02:08:45 -0400
Subject: [PATCH 8/9] addressed comments

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++--
 .../CodeGen/X86/constant-xor-and-andnot.ll    | 72 ++++++++++++-------
 2 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9cff8380b2a1e..af97a7dc76c66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1273,16 +1273,14 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
     SDValue XorOp, OtherOp;
     APInt XorConst;
 
-    // Check which operand of N0 is XOR(Constant, X)
-    if (sd_match(N00, m_Xor(m_ConstInt(XorConst), m_Value())) &&
-        !XorConst.isAllOnes()) {
-      XorOp = N00;
-      OtherOp = N01;
-    } else if (sd_match(N01, m_Xor(m_ConstInt(XorConst), m_Value())) &&
-               !XorConst.isAllOnes()) {
-      XorOp = N01;
-      OtherOp = N00;
-    } else {
+    // Match AND(XOR(X, Constant), b) in either operand order
+    // Constants are canonicalized to RHS, so we can rely on that
+    // Use m_c_BinOp to handle commutativity of the AND
+    if (!sd_match(N0, m_c_BinOp(ISD::AND,
+                                m_AllOf(m_Xor(m_Value(), m_ConstInt(XorConst)),
+                                        m_Value(XorOp)),
+                                m_Value(OtherOp))) ||
+        XorConst.isAllOnes()) {
       return SDValue();
     }
 
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 76056a413f904..150f47423c705 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -6,16 +6,21 @@
 ; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
 
 define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: test_constant_xor_and_andnot:
-; CHECK:       # %bb.0:
+; NOBMI-LABEL: test_constant_xor_and_andnot:
+; NOBMI:       # %bb.0:
 ; NOBMI-NEXT:    movq %rdx, %rax
-; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; NOBMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
 ; NOBMI-NEXT:    andq %rsi, %rdi
 ; NOBMI-NEXT:    notq %rax
 ; NOBMI-NEXT:    andq %rdi, %rax
-; BMI-NEXT:      andnq %rsi, %rdx, %rax
-; BMI-NEXT:      andq %rdi, %rax
-; CHECK-NEXT:    retq
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: test_constant_xor_and_andnot:
+; BMI:       # %bb.0:
+; BMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; BMI-NEXT:    andnq %rsi, %rdx, %rax
+; BMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b
   %not_c = xor i64 %c, -1
@@ -24,16 +29,21 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 }
 
 define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: test_constant_xor_and_andnot_32:
-; CHECK:       # %bb.0:
+; NOBMI-LABEL: test_constant_xor_and_andnot_32:
+; NOBMI:       # %bb.0:
 ; NOBMI-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
+; NOBMI-NEXT:    xorl $5678, %edi # imm = 0x162E
 ; NOBMI-NEXT:    andl %esi, %edi
 ; NOBMI-NEXT:    notl %eax
 ; NOBMI-NEXT:    andl %edi, %eax
-; BMI-NEXT:      andnl %esi, %edx, %eax
-; BMI-NEXT:      andl %edi, %eax
-; CHECK-NEXT:    retq
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: test_constant_xor_and_andnot_32:
+; BMI:       # %bb.0:
+; BMI-NEXT:    xorl $5678, %edi # imm = 0x162E
+; BMI-NEXT:    andnl %esi, %edx, %eax
+; BMI-NEXT:    andl %edi, %eax
+; BMI-NEXT:    retq
   %xor = xor i32 %a, 5678
   %and1 = and i32 %xor, %b
   %not_c = xor i32 %c, -1
@@ -43,16 +53,21 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
 
 ; Test with different operand order
 define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
-; CHECK:       # %bb.0:
+; NOBMI-LABEL: test_constant_xor_and_andnot_swapped:
+; NOBMI:       # %bb.0:
 ; NOBMI-NEXT:    movq %rdx, %rax
-; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; NOBMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
 ; NOBMI-NEXT:    andq %rsi, %rdi
 ; NOBMI-NEXT:    notq %rax
 ; NOBMI-NEXT:    andq %rdi, %rax
-; BMI-NEXT:      andnq %rsi, %rdx, %rax
-; BMI-NEXT:      andq %rdi, %rax
-; CHECK-NEXT:    retq
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: test_constant_xor_and_andnot_swapped:
+; BMI:       # %bb.0:
+; BMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; BMI-NEXT:    andnq %rsi, %rdx, %rax
+; BMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %b, %xor
   %not_c = xor i64 %c, -1
@@ -62,19 +77,26 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
 
 ; Test with different operand order for the final AND
 define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
-; CHECK:       # %bb.0:
+; NOBMI-LABEL: test_constant_xor_and_andnot_final_swapped:
+; NOBMI:       # %bb.0:
 ; NOBMI-NEXT:    movq %rdx, %rax
-; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; NOBMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
 ; NOBMI-NEXT:    andq %rsi, %rdi
 ; NOBMI-NEXT:    notq %rax
 ; NOBMI-NEXT:    andq %rdi, %rax
-; BMI-NEXT:      andnq %rsi, %rdx, %rax
-; BMI-NEXT:      andq %rdi, %rax
-; CHECK-NEXT:    retq
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: test_constant_xor_and_andnot_final_swapped:
+; BMI:       # %bb.0:
+; BMI-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; BMI-NEXT:    andnq %rsi, %rdx, %rax
+; BMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b
   %not_c = xor i64 %c, -1
   %result = and i64 %not_c, %and1
   ret i64 %result
-}
\ No newline at end of file
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From de88c96008de1b326a6a9c0d60d06d6d6f532c21 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Tue, 14 Oct 2025 23:04:48 -0400
Subject: [PATCH 9/9] made use case more generalizable to all similar patterns

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 40 ++++++++-----------
 ...-xor-and-andnot.ll => andn-reassociate.ll} | 26 +++++++++++-
 2 files changed, 41 insertions(+), 25 deletions(-)
 rename llvm/test/CodeGen/X86/{constant-xor-and-andnot.ll => andn-reassociate.ll} (81%)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index af97a7dc76c66..b414270641443 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1263,31 +1263,25 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
       return N0;
   }
 
-  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-  // This allows the andn operation to be done in parallel with the xor
+  // Optimize X & b & ~c -> X & (b & ~c) when ANDN is available
+  // This allows the ANDN operation to be done in parallel with computing X
   if (Opc == ISD::AND && (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) &&
-      sd_match(N1, m_Xor(m_Value(), m_AllOnes()))) {
-    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
-    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
-
-    SDValue XorOp, OtherOp;
-    APInt XorConst;
-
-    // Match AND(XOR(X, Constant), b) in either operand order
-    // Constants are canonicalized to RHS, so we can rely on that
-    // Use m_c_BinOp to handle commutativity of the AND
-    if (!sd_match(N0, m_c_BinOp(ISD::AND,
-                                m_AllOf(m_Xor(m_Value(), m_ConstInt(XorConst)),
-                                        m_Value(XorOp)),
-                                m_Value(OtherOp))) ||
-        XorConst.isAllOnes()) {
-      return SDValue();
-    }
+      sd_match(N1, m_Not(m_Value()))) {
+    // Look for pattern: AND(AND(X, b), NOT(c))
+    // Transform to: AND(X, AND(b, NOT(c)))
 
-    // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-    // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-    SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
-    return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+    SDValue X, B;
+
+    // Match AND(X, b) - check that N0 is an AND with one use
+    if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) {
+      X = N00;
+      B = N01;
+
+      // Transform: AND(AND(X, b), NOT(c))
+      // To: AND(X, AND(b, NOT(c)))
+      SDValue AndBC = DAG.getNode(ISD::AND, DL, VT, B, N1);
+      return DAG.getNode(ISD::AND, DL, VT, X, AndBC);
+    }
   }
   if (Opc == ISD::XOR) {
     // (N00 ^ N01) ^ N00 --> N01
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/andn-reassociate.ll
similarity index 81%
rename from llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
rename to llvm/test/CodeGen/X86/andn-reassociate.ll
index 150f47423c705..720702ddc1045 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/andn-reassociate.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI
 
-; Test the optimization described in issue #161630:
-; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
+; Test the optimization: X & b & ~c -> X & (b & ~c)
+; This reassociation allows ANDN to execute in parallel with computing X
 
 define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 ; NOBMI-LABEL: test_constant_xor_and_andnot:
@@ -98,5 +98,27 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
   %result = and i64 %not_c, %and1
   ret i64 %result
 }
+
+define i64 @test_add_and_andnot(i64 %a, i64 %b, i64 %c) {
+; NOBMI-LABEL: test_add_and_andnot:
+; NOBMI:       # %bb.0:
+; NOBMI-NEXT:    leaq 5678(%rdi), %rax
+; NOBMI-NEXT:    andq %rsi, %rax
+; NOBMI-NEXT:    notq %rdx
+; NOBMI-NEXT:    andq %rdx, %rax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: test_add_and_andnot:
+; BMI:       # %bb.0:
+; BMI-NEXT:    leaq 5678(%rdi), %rcx
+; BMI-NEXT:    andnq %rsi, %rdx, %rax
+; BMI-NEXT:    andq %rcx, %rax
+; BMI-NEXT:    retq
+  %add = add i64 %a, 5678
+  %and1 = and i64 %add, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}



More information about the llvm-commits mailing list