[llvm] Optimized Constant Xor And And Not Operation (PR #161784)

Thu Oct 9 21:26:54 PDT 2025

https://github.com/manik-muk updated https://github.com/llvm/llvm-project/pull/161784

>From af5dcb54f7888cbc07183f6be852f29e321afc6b Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 3 Oct 2025 01:03:52 -0400
Subject: [PATCH 1/5] added optimization and tests

---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 63 +++++++++++++++++++
 .../CodeGen/X86/constant-xor-and-andnot.ll    | 63 +++++++++++++++++++
 llvm/test/CodeGen/X86/pr108731.ll             | 12 ++--
 3 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/constant-xor-and-andnot.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cdc97faf394ca..6c562ccf5b363 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+/// This allows the andn operation to be done in parallel with the xor
+static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
+                                           SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  using namespace llvm::SDPatternMatch;
+
+  EVT VT = N->getValueType(0);
+  // Only handle scalar integer types that support BMI instructions
+  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Check if N0 is AND(XOR(Constant, a), b)
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue AndLHS = N0.getOperand(0);
+  SDValue AndRHS = N0.getOperand(1);
+
+  // Check if one operand is XOR(Constant, a)
+  SDValue XorOp, OtherOp;
+  if (AndLHS.getOpcode() == ISD::XOR) {
+    XorOp = AndLHS;
+    OtherOp = AndRHS;
+  } else if (AndRHS.getOpcode() == ISD::XOR) {
+    XorOp = AndRHS;
+    OtherOp = AndLHS;
+  } else {
+    return SDValue();
+  }
+
+  // Check if XOR has a constant operand
+  if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
+      !isa<ConstantSDNode>(XorOp.getOperand(1))) {
+    return SDValue();
+  }
+
+  // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+  SDValue NotOp;
+  if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
+    NotOp = N1.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+  // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+  // This allows the andn (b & ~c) to be done in parallel with the xor
+
+  // Create AND(b, NOT(c)) - this will become andn
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+  // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
+  return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+}
+
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
                                        SelectionDAG &DAG,
@@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
     return R;
 
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
new file mode 100644
index 0000000000000..5a4d931d29896
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+
+; Test the optimization described in issue #161630:
+; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
+
+define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
+; CHECK-NEXT:    andnl %esi, %edx, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+  %xor = xor i32 %a, 5678
+  %and1 = and i32 %xor, %b
+  %not_c = xor i32 %c, -1
+  %result = and i32 %and1, %not_c
+  ret i32 %result
+}
+
+; Test with different operand order
+define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %b, %xor
+  %not_c = xor i64 %c, -1
+  %result = and i64 %and1, %not_c
+  ret i64 %result
+}
+
+; Test with different operand order for the final AND
+define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT:    andq %rsi, %rdi
+; CHECK-NEXT:    andnq %rdi, %rdx, %rax
+; CHECK-NEXT:    retq
+  %xor = xor i64 %a, 1234
+  %and1 = and i64 %xor, %b
+  %not_c = xor i64 %c, -1
+  %result = and i64 %not_c, %and1
+  ret i64 %result
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
index 2983d108eaedd..bda90117a1be4 100644
--- a/llvm/test/CodeGen/X86/pr108731.ll
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
 ; BMI-LABEL: test_i64:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andq %rdx, %rsi
-; BMI-NEXT:    andnq %rdi, %rsi, %rax
-; BMI-NEXT:    andnq %rcx, %rdx, %rcx
-; BMI-NEXT:    andnq %rax, %rcx, %rax
+; BMI-NEXT:    andnq %rcx, %rdx, %rax
+; BMI-NEXT:    andnq %rdi, %rax, %rax
+; BMI-NEXT:    andnq %rax, %rsi, %rax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i64 %y, %x
@@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
 ; BMI-LABEL: test_i32:
 ; BMI:       # %bb.0: # %Entry
 ; BMI-NEXT:    andl %edx, %esi
-; BMI-NEXT:    andnl %edi, %esi, %eax
-; BMI-NEXT:    andnl %ecx, %edx, %ecx
-; BMI-NEXT:    andnl %eax, %ecx, %eax
+; BMI-NEXT:    andnl %ecx, %edx, %eax
+; BMI-NEXT:    andnl %edi, %eax, %eax
+; BMI-NEXT:    andnl %eax, %esi, %eax
 ; BMI-NEXT:    retq
 Entry:
   %and1 = and i32 %y, %x

>From b176fd6e56d22c8f06190246ab5b5a2871776060 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 16:32:10 -0400
Subject: [PATCH 2/5] Move constant XOR AND ANDNOT optimization to generic DAG
 combiner

This moves the optimization from X86-specific code to the generic
reassociateOpsCommutative function in DAGCombiner.cpp. The optimization
transforms (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
to allow ANDNOT operations to be done in parallel with XOR operations.

This benefits all targets that have ANDNOT instructions (X86 BMI, ARM BIC,
RISC-V, etc.) rather than being limited to X86 only.

- Remove X86-specific combineConstantXorAndAndNot function
- Add generic optimization to reassociateOpsCommutative with TLI.hasAndNot check
- Update test expectations for the new optimized output
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 62 -------------------
 .../CodeGen/X86/constant-xor-and-andnot.ll    |  4 +-
 3 files changed, 37 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ef2b35952833..4241019d47ec1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1262,6 +1262,41 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
     if (N1 == N00 || N1 == N01)
       return N0;
   }
+
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
+    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+    
+    // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+    if (N1.getOpcode() == ISD::XOR && 
+        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+        isAllOnesConstant(N1.getOperand(1))) {
+      
+      // Check if one operand of N0 is XOR(Constant, a)
+      SDValue XorOp, OtherOp;
+      if (N00.getOpcode() == ISD::XOR) {
+        XorOp = N00;
+        OtherOp = N01;
+      } else if (N01.getOpcode() == ISD::XOR) {
+        XorOp = N01;
+        OtherOp = N00;
+      } else {
+        return SDValue();
+      }
+      
+      // Check if XOR has a constant operand
+      if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
+          DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
+        // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+        // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+        // This allows the andn (b & ~c) to be done in parallel with the xor
+        SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+        return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+      }
+    }
+  }
   if (Opc == ISD::XOR) {
     // (N00 ^ N01) ^ N00 --> N01
     if (N1 == N00)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6c562ccf5b363..d32cb680594c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,63 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-/// This allows the andn operation to be done in parallel with the xor
-static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
-                                           SelectionDAG &DAG,
-                                           const X86Subtarget &Subtarget) {
-  using namespace llvm::SDPatternMatch;
-
-  EVT VT = N->getValueType(0);
-  // Only handle scalar integer types that support BMI instructions
-  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
-    return SDValue();
-
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  // Check if N0 is AND(XOR(Constant, a), b)
-  if (N0.getOpcode() != ISD::AND)
-    return SDValue();
-
-  SDValue AndLHS = N0.getOperand(0);
-  SDValue AndRHS = N0.getOperand(1);
-
-  // Check if one operand is XOR(Constant, a)
-  SDValue XorOp, OtherOp;
-  if (AndLHS.getOpcode() == ISD::XOR) {
-    XorOp = AndLHS;
-    OtherOp = AndRHS;
-  } else if (AndRHS.getOpcode() == ISD::XOR) {
-    XorOp = AndRHS;
-    OtherOp = AndLHS;
-  } else {
-    return SDValue();
-  }
-
-  // Check if XOR has a constant operand
-  if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
-      !isa<ConstantSDNode>(XorOp.getOperand(1))) {
-    return SDValue();
-  }
-
-  // Check if N1 is NOT(c) - i.e., XOR(c, -1)
-  SDValue NotOp;
-  if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
-    NotOp = N1.getOperand(0);
-  } else {
-    return SDValue();
-  }
-
-  // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-  // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-  // This allows the andn (b & ~c) to be done in parallel with the xor
-
-  // Create AND(b, NOT(c)) - this will become andn
-  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
-  // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
-  return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-}
 
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
@@ -51891,11 +51834,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
     return R;
 
-  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-  // This allows the andn operation to be done in parallel with the xor
-  if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
-    return R;
-
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 5a4d931d29896..923d065962081 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -52,8 +52,8 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andq %rsi, %rdi
-; CHECK-NEXT:    andnq %rdi, %rdx, %rax
+; CHECK-NEXT:    andnq %rsi, %rdx, %rax
+; CHECK-NEXT:    andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b

>From 4a2e54661ca9758d6277e63992b80b065ff64588 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 19:30:44 -0400
Subject: [PATCH 3/5] changed combiner logic to account for infinite loops

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4241019d47ec1..9b30f7a672c7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7498,6 +7498,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
+  // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+  // This allows the andn operation to be done in parallel with the xor
+  if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
+    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+    
+    // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
+    SDValue AndOp, NotOp;
+    if (N0.getOpcode() == ISD::AND && 
+        N1.getOpcode() == ISD::XOR && 
+        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+        isAllOnesConstant(N1.getOperand(1))) {
+      AndOp = N0;
+      NotOp = N1;
+    } else if (N1.getOpcode() == ISD::AND &&
+               N0.getOpcode() == ISD::XOR && 
+               DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+               isAllOnesConstant(N0.getOperand(1))) {
+      AndOp = N1;
+      NotOp = N0;
+    } else {
+      goto skip_optimization;
+    }
+    
+    // Prevent infinite loops: only apply if the AND node has one use
+    if (!AndOp.hasOneUse())
+      goto skip_optimization;
+    
+    SDValue AndOp0 = AndOp.getOperand(0);
+    SDValue AndOp1 = AndOp.getOperand(1);
+    
+    // Check if one operand of AndOp is XOR(Constant, a)
+    SDValue XorOp, OtherOp;
+    if (AndOp0.getOpcode() == ISD::XOR) {
+      XorOp = AndOp0;
+      OtherOp = AndOp1;
+    } else if (AndOp1.getOpcode() == ISD::XOR) {
+      XorOp = AndOp1;
+      OtherOp = AndOp0;
+    } else {
+      goto skip_optimization;
+    }
+    
+    // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
+    if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+         !isAllOnesConstant(XorOp.getOperand(0))) ||
+        (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+         !isAllOnesConstant(XorOp.getOperand(1)))) {
+      // Prevent infinite loops: only apply if OtherOp is not also a NOT
+      if (OtherOp.getOpcode() == ISD::XOR && 
+          DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+          isAllOnesConstant(OtherOp.getOperand(1))) {
+        goto skip_optimization;
+      }
+      // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+      // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+      // This allows the andn (b & ~c) to be done in parallel with the xor
+      SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+      return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+    }
+  }
+skip_optimization:
+
   // reassociate and
   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
     return RAND;

>From 7fb0e39bcb733850a671a8c922719cda80d05e31 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 20:13:32 -0400
Subject: [PATCH 4/5] refactored to remove goto

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++----------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9b30f7a672c7f..e92e1319bb0f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7519,47 +7519,45 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       AndOp = N1;
       NotOp = N0;
     } else {
-      goto skip_optimization;
+      // Pattern doesn't match, continue to next optimization
     }
     
-    // Prevent infinite loops: only apply if the AND node has one use
-    if (!AndOp.hasOneUse())
-      goto skip_optimization;
-    
-    SDValue AndOp0 = AndOp.getOperand(0);
-    SDValue AndOp1 = AndOp.getOperand(1);
-    
-    // Check if one operand of AndOp is XOR(Constant, a)
-    SDValue XorOp, OtherOp;
-    if (AndOp0.getOpcode() == ISD::XOR) {
-      XorOp = AndOp0;
-      OtherOp = AndOp1;
-    } else if (AndOp1.getOpcode() == ISD::XOR) {
-      XorOp = AndOp1;
-      OtherOp = AndOp0;
-    } else {
-      goto skip_optimization;
-    }
-    
-    // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
-    if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
-         !isAllOnesConstant(XorOp.getOperand(0))) ||
-        (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
-         !isAllOnesConstant(XorOp.getOperand(1)))) {
-      // Prevent infinite loops: only apply if OtherOp is not also a NOT
-      if (OtherOp.getOpcode() == ISD::XOR && 
-          DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
-          isAllOnesConstant(OtherOp.getOperand(1))) {
-        goto skip_optimization;
+    // If we found a valid pattern, check if the AND node has one use
+    if (AndOp && NotOp && AndOp.hasOneUse()) {
+      SDValue AndOp0 = AndOp.getOperand(0);
+      SDValue AndOp1 = AndOp.getOperand(1);
+      
+      // Check if one operand of AndOp is XOR(Constant, a)
+      SDValue XorOp, OtherOp;
+      if (AndOp0.getOpcode() == ISD::XOR) {
+        XorOp = AndOp0;
+        OtherOp = AndOp1;
+      } else if (AndOp1.getOpcode() == ISD::XOR) {
+        XorOp = AndOp1;
+        OtherOp = AndOp0;
+      } else {
+        // No XOR found in AND operands, continue to next optimization
+      }
+      
+      // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
+      if (XorOp && OtherOp &&
+          ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+            !isAllOnesConstant(XorOp.getOperand(0))) ||
+           (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+            !isAllOnesConstant(XorOp.getOperand(1))))) {
+        // Prevent infinite loops: only apply if OtherOp is not also a NOT
+        if (!(OtherOp.getOpcode() == ISD::XOR && 
+              DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+              isAllOnesConstant(OtherOp.getOperand(1)))) {
+          // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+          // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+          // This allows the andn (b & ~c) to be done in parallel with the xor
+          SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+          return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+        }
       }
-      // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-      // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-      // This allows the andn (b & ~c) to be done in parallel with the xor
-      SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
-      return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
     }
   }
-skip_optimization:
 
   // reassociate and
   if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))

>From d9c1a7568fb75a69c7dcc7f2fef25d637e0b4f00 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 10 Oct 2025 00:26:23 -0400
Subject: [PATCH 5/5] addressed comments

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 ++++++++-----------
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  1 -
 .../CodeGen/X86/constant-xor-and-andnot.ll    | 35 +++++---
 3 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e92e1319bb0f8..45ddb02a96d92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7501,60 +7501,43 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
   // This allows the andn operation to be done in parallel with the xor
   if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
-    // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
-    // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
-    
-    // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
-    SDValue AndOp, NotOp;
-    if (N0.getOpcode() == ISD::AND && 
-        N1.getOpcode() == ISD::XOR && 
-        DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
-        isAllOnesConstant(N1.getOperand(1))) {
-      AndOp = N0;
-      NotOp = N1;
-    } else if (N1.getOpcode() == ISD::AND &&
-               N0.getOpcode() == ISD::XOR && 
-               DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
-               isAllOnesConstant(N0.getOperand(1))) {
-      AndOp = N1;
-      NotOp = N0;
-    } else {
-      // Pattern doesn't match, continue to next optimization
-    }
+    SDValue InnerAndOp0, InnerAndOp1, NotArg;
     
-    // If we found a valid pattern, check if the AND node has one use
-    if (AndOp && NotOp && AndOp.hasOneUse()) {
-      SDValue AndOp0 = AndOp.getOperand(0);
-      SDValue AndOp1 = AndOp.getOperand(1);
+    // Match: AND(AND(Op0, Op1), NOT(NotArg))
+    // where NOT is represented as XOR with all-ones
+    // m_And automatically handles commutativity
+    if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0), 
+                                         m_Value(InnerAndOp1))),
+                          m_Xor(m_Value(NotArg), m_AllOnes())))) {
       
-      // Check if one operand of AndOp is XOR(Constant, a)
+      // Determine which operand is XOR(Constant, X) where Constant is not all-ones
       SDValue XorOp, OtherOp;
-      if (AndOp0.getOpcode() == ISD::XOR) {
-        XorOp = AndOp0;
-        OtherOp = AndOp1;
-      } else if (AndOp1.getOpcode() == ISD::XOR) {
-        XorOp = AndOp1;
-        OtherOp = AndOp0;
+      APInt XorConst;
+      
+      // Try first operand - m_Xor handles commutativity for XOR operands
+      if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+          !XorConst.isAllOnes()) {
+        XorOp = InnerAndOp0;
+        OtherOp = InnerAndOp1;
+      } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+                 !XorConst.isAllOnes()) {
+        XorOp = InnerAndOp1;
+        OtherOp = InnerAndOp0;
       } else {
-        // No XOR found in AND operands, continue to next optimization
+        // Pattern doesn't match - no XOR(Constant, X) found
+        XorOp = SDValue();
       }
       
-      // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
-      if (XorOp && OtherOp &&
-          ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
-            !isAllOnesConstant(XorOp.getOperand(0))) ||
-           (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
-            !isAllOnesConstant(XorOp.getOperand(1))))) {
-        // Prevent infinite loops: only apply if OtherOp is not also a NOT
-        if (!(OtherOp.getOpcode() == ISD::XOR && 
-              DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
-              isAllOnesConstant(OtherOp.getOperand(1)))) {
-          // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
-          // To: AND(XOR(Constant, a), AND(b, NOT(c)))
-          // This allows the andn (b & ~c) to be done in parallel with the xor
-          SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
-          return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-        }
+      // If we found the pattern, apply the transformation
+      // Prevent infinite loops by checking OtherOp is not also a NOT
+      if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) {
+        // Get the NOT node (either N0 or N1)
+        SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1;
+        
+        // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+        // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+        SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+        return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
       }
     }
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d32cb680594c6..cdc97faf394ca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,7 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-
 /// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
 static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
                                        SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 923d065962081..76056a413f904 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI
 
 ; Test the optimization described in issue #161630:
 ; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
@@ -7,9 +8,13 @@
 define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b
@@ -21,9 +26,13 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
 define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_32:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    xorl $5678, %edi # imm = 0x162E
-; CHECK-NEXT:    andnl %esi, %edx, %eax
-; CHECK-NEXT:    andl %edi, %eax
+; NOBMI-NEXT:    andl %esi, %edi
+; NOBMI-NEXT:    notl %eax
+; NOBMI-NEXT:    andl %edi, %eax
+; BMI-NEXT:      andnl %esi, %edx, %eax
+; BMI-NEXT:      andl %edi, %eax
 ; CHECK-NEXT:    retq
   %xor = xor i32 %a, 5678
   %and1 = and i32 %xor, %b
@@ -36,9 +45,13 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
 define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %b, %xor
@@ -51,9 +64,13 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
 define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
 ; CHECK:       # %bb.0:
+; NOBMI-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT:    andnq %rsi, %rdx, %rax
-; CHECK-NEXT:    andq %rdi, %rax
+; NOBMI-NEXT:    andq %rsi, %rdi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    andq %rdi, %rax
+; BMI-NEXT:      andnq %rsi, %rdx, %rax
+; BMI-NEXT:      andq %rdi, %rax
 ; CHECK-NEXT:    retq
   %xor = xor i64 %a, 1234
   %and1 = and i64 %xor, %b