[llvm] Optimized Constant Xor And And Not Operation (PR #161784)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 21:26:54 PDT 2025
https://github.com/manik-muk updated https://github.com/llvm/llvm-project/pull/161784
>From af5dcb54f7888cbc07183f6be852f29e321afc6b Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 3 Oct 2025 01:03:52 -0400
Subject: [PATCH 1/5] added optimization and tests
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 63 +++++++++++++++++++
.../CodeGen/X86/constant-xor-and-andnot.ll | 63 +++++++++++++++++++
llvm/test/CodeGen/X86/pr108731.ll | 12 ++--
3 files changed, 132 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cdc97faf394ca..6c562ccf5b363 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,6 +51541,64 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+/// This allows the andn operation to be done in parallel with the xor
+static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace llvm::SDPatternMatch;
+
+ EVT VT = N->getValueType(0);
+ // Only handle scalar integer types that support BMI instructions
+ if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Check if N0 is AND(XOR(Constant, a), b)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue AndLHS = N0.getOperand(0);
+ SDValue AndRHS = N0.getOperand(1);
+
+ // Check if one operand is XOR(Constant, a)
+ SDValue XorOp, OtherOp;
+ if (AndLHS.getOpcode() == ISD::XOR) {
+ XorOp = AndLHS;
+ OtherOp = AndRHS;
+ } else if (AndRHS.getOpcode() == ISD::XOR) {
+ XorOp = AndRHS;
+ OtherOp = AndLHS;
+ } else {
+ return SDValue();
+ }
+
+ // Check if XOR has a constant operand
+ if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
+ !isa<ConstantSDNode>(XorOp.getOperand(1))) {
+ return SDValue();
+ }
+
+ // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+ SDValue NotOp;
+ if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
+ NotOp = N1.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+ // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+ // This allows the andn (b & ~c) to be done in parallel with the xor
+
+ // Create AND(b, NOT(c)) - this will become andn
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+ // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
+ return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+}
+
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
SelectionDAG &DAG,
@@ -51833,6 +51891,11 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
return R;
+ // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+ // This allows the andn operation to be done in parallel with the xor
+ if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
+ return R;
+
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
new file mode 100644
index 0000000000000..5a4d931d29896
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+
+; Test the optimization described in issue #161630:
+; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
+
+define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT: andnq %rsi, %rdx, %rax
+; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: retq
+ %xor = xor i64 %a, 1234
+ %and1 = and i64 %xor, %b
+ %not_c = xor i64 %c, -1
+ %result = and i64 %and1, %not_c
+ ret i64 %result
+}
+
+define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E
+; CHECK-NEXT: andnl %esi, %edx, %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %xor = xor i32 %a, 5678
+ %and1 = and i32 %xor, %b
+ %not_c = xor i32 %c, -1
+ %result = and i32 %and1, %not_c
+ ret i32 %result
+}
+
+; Test with different operand order
+define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT: andnq %rsi, %rdx, %rax
+; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: retq
+ %xor = xor i64 %a, 1234
+ %and1 = and i64 %b, %xor
+ %not_c = xor i64 %c, -1
+ %result = and i64 %and1, %not_c
+ ret i64 %result
+}
+
+; Test with different operand order for the final AND
+define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
+; CHECK-NEXT: andq %rsi, %rdi
+; CHECK-NEXT: andnq %rdi, %rdx, %rax
+; CHECK-NEXT: retq
+ %xor = xor i64 %a, 1234
+ %and1 = and i64 %xor, %b
+ %not_c = xor i64 %c, -1
+ %result = and i64 %not_c, %and1
+ ret i64 %result
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
index 2983d108eaedd..bda90117a1be4 100644
--- a/llvm/test/CodeGen/X86/pr108731.ll
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -17,9 +17,9 @@ define i64 @test_i64(i64 %w, i64 %x, i64 %y, i64 %z) {
; BMI-LABEL: test_i64:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andq %rdx, %rsi
-; BMI-NEXT: andnq %rdi, %rsi, %rax
-; BMI-NEXT: andnq %rcx, %rdx, %rcx
-; BMI-NEXT: andnq %rax, %rcx, %rax
+; BMI-NEXT: andnq %rcx, %rdx, %rax
+; BMI-NEXT: andnq %rdi, %rax, %rax
+; BMI-NEXT: andnq %rax, %rsi, %rax
; BMI-NEXT: retq
Entry:
%and1 = and i64 %y, %x
@@ -46,9 +46,9 @@ define i32 @test_i32(i32 %w, i32 %x, i32 %y, i32 %z) {
; BMI-LABEL: test_i32:
; BMI: # %bb.0: # %Entry
; BMI-NEXT: andl %edx, %esi
-; BMI-NEXT: andnl %edi, %esi, %eax
-; BMI-NEXT: andnl %ecx, %edx, %ecx
-; BMI-NEXT: andnl %eax, %ecx, %eax
+; BMI-NEXT: andnl %ecx, %edx, %eax
+; BMI-NEXT: andnl %edi, %eax, %eax
+; BMI-NEXT: andnl %eax, %esi, %eax
; BMI-NEXT: retq
Entry:
%and1 = and i32 %y, %x
>From b176fd6e56d22c8f06190246ab5b5a2871776060 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 16:32:10 -0400
Subject: [PATCH 2/5] Move constant XOR AND ANDNOT optimization to generic DAG
combiner
This moves the optimization from X86-specific code to the generic
reassociateOpsCommutative function in DAGCombiner.cpp. The optimization
transforms (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
to allow ANDNOT operations to be done in parallel with XOR operations.
This benefits all targets that have ANDNOT instructions (X86 BMI, ARM BIC,
RISC-V, etc.) rather than being limited to X86 only.
- Remove X86-specific combineConstantXorAndAndNot function
- Add generic optimization to reassociateOpsCommutative with TLI.hasAndNot check
- Update test expectations for the new optimized output
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++++
llvm/lib/Target/X86/X86ISelLowering.cpp | 62 -------------------
.../CodeGen/X86/constant-xor-and-andnot.ll | 4 +-
3 files changed, 37 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ef2b35952833..4241019d47ec1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1262,6 +1262,41 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N1 == N00 || N1 == N01)
return N0;
}
+
+ // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+ // This allows the andn operation to be done in parallel with the xor
+ if (Opc == ISD::AND && TLI.hasAndNot(N1)) {
+ // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+ // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+
+ // Check if N1 is NOT(c) - i.e., XOR(c, -1)
+ if (N1.getOpcode() == ISD::XOR &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ isAllOnesConstant(N1.getOperand(1))) {
+
+ // Check if one operand of N0 is XOR(Constant, a)
+ SDValue XorOp, OtherOp;
+ if (N00.getOpcode() == ISD::XOR) {
+ XorOp = N00;
+ OtherOp = N01;
+ } else if (N01.getOpcode() == ISD::XOR) {
+ XorOp = N01;
+ OtherOp = N00;
+ } else {
+ return SDValue();
+ }
+
+ // Check if XOR has a constant operand
+ if (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) ||
+ DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1))) {
+ // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+ // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+ // This allows the andn (b & ~c) to be done in parallel with the xor
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
+ return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+ }
+ }
+ }
if (Opc == ISD::XOR) {
// (N00 ^ N01) ^ N00 --> N01
if (N1 == N00)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6c562ccf5b363..d32cb680594c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,63 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
-/// This allows the andn operation to be done in parallel with the xor
-static SDValue combineConstantXorAndAndNot(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- using namespace llvm::SDPatternMatch;
-
- EVT VT = N->getValueType(0);
- // Only handle scalar integer types that support BMI instructions
- if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
- return SDValue();
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
-
- // Check if N0 is AND(XOR(Constant, a), b)
- if (N0.getOpcode() != ISD::AND)
- return SDValue();
-
- SDValue AndLHS = N0.getOperand(0);
- SDValue AndRHS = N0.getOperand(1);
-
- // Check if one operand is XOR(Constant, a)
- SDValue XorOp, OtherOp;
- if (AndLHS.getOpcode() == ISD::XOR) {
- XorOp = AndLHS;
- OtherOp = AndRHS;
- } else if (AndRHS.getOpcode() == ISD::XOR) {
- XorOp = AndRHS;
- OtherOp = AndLHS;
- } else {
- return SDValue();
- }
-
- // Check if XOR has a constant operand
- if (!isa<ConstantSDNode>(XorOp.getOperand(0)) &&
- !isa<ConstantSDNode>(XorOp.getOperand(1))) {
- return SDValue();
- }
-
- // Check if N1 is NOT(c) - i.e., XOR(c, -1)
- SDValue NotOp;
- if (N1.getOpcode() == ISD::XOR && isAllOnesConstant(N1.getOperand(1))) {
- NotOp = N1.getOperand(0);
- } else {
- return SDValue();
- }
-
- // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
- // To: AND(XOR(Constant, a), AND(b, NOT(c)))
- // This allows the andn (b & ~c) to be done in parallel with the xor
-
- // Create AND(b, NOT(c)) - this will become andn
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, N1);
- // Create final AND(XOR(Constant, a), AND(b, NOT(c)))
- return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
-}
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
@@ -51891,11 +51834,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG))
return R;
- // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
- // This allows the andn operation to be done in parallel with the xor
- if (SDValue R = combineConstantXorAndAndNot(N, dl, DAG, Subtarget))
- return R;
-
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 5a4d931d29896..923d065962081 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -52,8 +52,8 @@ define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
; CHECK: # %bb.0:
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT: andq %rsi, %rdi
-; CHECK-NEXT: andnq %rdi, %rdx, %rax
+; CHECK-NEXT: andnq %rsi, %rdx, %rax
+; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
>From 4a2e54661ca9758d6277e63992b80b065ff64588 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 19:30:44 -0400
Subject: [PATCH 3/5] changed combiner logic to account for infinite loops
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 63 +++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4241019d47ec1..9b30f7a672c7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7498,6 +7498,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ // Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
+ // This allows the andn operation to be done in parallel with the xor
+ if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
+ // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
+ // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
+
+ // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
+ SDValue AndOp, NotOp;
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::XOR &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
+ isAllOnesConstant(N1.getOperand(1))) {
+ AndOp = N0;
+ NotOp = N1;
+ } else if (N1.getOpcode() == ISD::AND &&
+ N0.getOpcode() == ISD::XOR &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isAllOnesConstant(N0.getOperand(1))) {
+ AndOp = N1;
+ NotOp = N0;
+ } else {
+ goto skip_optimization;
+ }
+
+ // Prevent infinite loops: only apply if the AND node has one use
+ if (!AndOp.hasOneUse())
+ goto skip_optimization;
+
+ SDValue AndOp0 = AndOp.getOperand(0);
+ SDValue AndOp1 = AndOp.getOperand(1);
+
+ // Check if one operand of AndOp is XOR(Constant, a)
+ SDValue XorOp, OtherOp;
+ if (AndOp0.getOpcode() == ISD::XOR) {
+ XorOp = AndOp0;
+ OtherOp = AndOp1;
+ } else if (AndOp1.getOpcode() == ISD::XOR) {
+ XorOp = AndOp1;
+ OtherOp = AndOp0;
+ } else {
+ goto skip_optimization;
+ }
+
+ // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
+ if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+ !isAllOnesConstant(XorOp.getOperand(0))) ||
+ (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+ !isAllOnesConstant(XorOp.getOperand(1)))) {
+ // Prevent infinite loops: only apply if OtherOp is not also a NOT
+ if (OtherOp.getOpcode() == ISD::XOR &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+ isAllOnesConstant(OtherOp.getOperand(1))) {
+ goto skip_optimization;
+ }
+ // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+ // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+ // This allows the andn (b & ~c) to be done in parallel with the xor
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+ return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+ }
+ }
+skip_optimization:
+
// reassociate and
if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
return RAND;
>From 7fb0e39bcb733850a671a8c922719cda80d05e31 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Sat, 4 Oct 2025 20:13:32 -0400
Subject: [PATCH 4/5] refactored to remove goto
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 70 +++++++++----------
1 file changed, 34 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9b30f7a672c7f..e92e1319bb0f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7519,47 +7519,45 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AndOp = N1;
NotOp = N0;
} else {
- goto skip_optimization;
+ // Pattern doesn't match, continue to next optimization
}
- // Prevent infinite loops: only apply if the AND node has one use
- if (!AndOp.hasOneUse())
- goto skip_optimization;
-
- SDValue AndOp0 = AndOp.getOperand(0);
- SDValue AndOp1 = AndOp.getOperand(1);
-
- // Check if one operand of AndOp is XOR(Constant, a)
- SDValue XorOp, OtherOp;
- if (AndOp0.getOpcode() == ISD::XOR) {
- XorOp = AndOp0;
- OtherOp = AndOp1;
- } else if (AndOp1.getOpcode() == ISD::XOR) {
- XorOp = AndOp1;
- OtherOp = AndOp0;
- } else {
- goto skip_optimization;
- }
-
- // Check if XOR has a constant operand (and not all-ones constant to avoid NOT)
- if ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
- !isAllOnesConstant(XorOp.getOperand(0))) ||
- (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
- !isAllOnesConstant(XorOp.getOperand(1)))) {
- // Prevent infinite loops: only apply if OtherOp is not also a NOT
- if (OtherOp.getOpcode() == ISD::XOR &&
- DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
- isAllOnesConstant(OtherOp.getOperand(1))) {
- goto skip_optimization;
+ // If we found a valid pattern, check if the AND node has one use
+ if (AndOp && NotOp && AndOp.hasOneUse()) {
+ SDValue AndOp0 = AndOp.getOperand(0);
+ SDValue AndOp1 = AndOp.getOperand(1);
+
+ // Check if one operand of AndOp is XOR(Constant, a)
+ SDValue XorOp, OtherOp;
+ if (AndOp0.getOpcode() == ISD::XOR) {
+ XorOp = AndOp0;
+ OtherOp = AndOp1;
+ } else if (AndOp1.getOpcode() == ISD::XOR) {
+ XorOp = AndOp1;
+ OtherOp = AndOp0;
+ } else {
+ // No XOR found in AND operands, continue to next optimization
+ }
+
+ // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
+ if (XorOp && OtherOp &&
+ ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
+ !isAllOnesConstant(XorOp.getOperand(0))) ||
+ (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
+ !isAllOnesConstant(XorOp.getOperand(1))))) {
+ // Prevent infinite loops: only apply if OtherOp is not also a NOT
+ if (!(OtherOp.getOpcode() == ISD::XOR &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
+ isAllOnesConstant(OtherOp.getOperand(1)))) {
+ // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+ // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+ // This allows the andn (b & ~c) to be done in parallel with the xor
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+ return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
+ }
}
- // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
- // To: AND(XOR(Constant, a), AND(b, NOT(c)))
- // This allows the andn (b & ~c) to be done in parallel with the xor
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
- return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
}
}
-skip_optimization:
// reassociate and
if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
>From d9c1a7568fb75a69c7dcc7f2fef25d637e0b4f00 Mon Sep 17 00:00:00 2001
From: Manik Mukherjee <mkmrocks20 at gmail.com>
Date: Fri, 10 Oct 2025 00:26:23 -0400
Subject: [PATCH 5/5] addressed comments
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 79 ++++++++-----------
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 -
.../CodeGen/X86/constant-xor-and-andnot.ll | 35 +++++---
3 files changed, 57 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e92e1319bb0f8..45ddb02a96d92 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7501,60 +7501,43 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Optimize (Constant XOR a) & b & ~c -> (Constant XOR a) & (b & ~c)
// This allows the andn operation to be done in parallel with the xor
if (TLI.hasAndNot(N1) || TLI.hasAndNot(N0)) {
- // Look for pattern: AND(AND(XOR(Constant, a), b), NOT(c))
- // Transform to: AND(XOR(Constant, a), AND(b, NOT(c)))
-
- // Handle both operand orders: N0=AND, N1=NOT and N0=NOT, N1=AND
- SDValue AndOp, NotOp;
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::XOR &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1)) &&
- isAllOnesConstant(N1.getOperand(1))) {
- AndOp = N0;
- NotOp = N1;
- } else if (N1.getOpcode() == ISD::AND &&
- N0.getOpcode() == ISD::XOR &&
- DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
- isAllOnesConstant(N0.getOperand(1))) {
- AndOp = N1;
- NotOp = N0;
- } else {
- // Pattern doesn't match, continue to next optimization
- }
+ SDValue InnerAndOp0, InnerAndOp1, NotArg;
- // If we found a valid pattern, check if the AND node has one use
- if (AndOp && NotOp && AndOp.hasOneUse()) {
- SDValue AndOp0 = AndOp.getOperand(0);
- SDValue AndOp1 = AndOp.getOperand(1);
+ // Match: AND(AND(Op0, Op1), NOT(NotArg))
+ // where NOT is represented as XOR with all-ones
+ // m_And automatically handles commutativity
+ if (sd_match(N, m_And(m_OneUse(m_And(m_Value(InnerAndOp0),
+ m_Value(InnerAndOp1))),
+ m_Xor(m_Value(NotArg), m_AllOnes())))) {
- // Check if one operand of AndOp is XOR(Constant, a)
+ // Determine which operand is XOR(Constant, X) where Constant is not all-ones
SDValue XorOp, OtherOp;
- if (AndOp0.getOpcode() == ISD::XOR) {
- XorOp = AndOp0;
- OtherOp = AndOp1;
- } else if (AndOp1.getOpcode() == ISD::XOR) {
- XorOp = AndOp1;
- OtherOp = AndOp0;
+ APInt XorConst;
+
+ // Try first operand - m_Xor handles commutativity for XOR operands
+ if (sd_match(InnerAndOp0, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+ !XorConst.isAllOnes()) {
+ XorOp = InnerAndOp0;
+ OtherOp = InnerAndOp1;
+ } else if (sd_match(InnerAndOp1, m_Xor(m_ConstInt(XorConst), m_Value())) &&
+ !XorConst.isAllOnes()) {
+ XorOp = InnerAndOp1;
+ OtherOp = InnerAndOp0;
} else {
- // No XOR found in AND operands, continue to next optimization
+ // Pattern doesn't match - no XOR(Constant, X) found
+ XorOp = SDValue();
}
- // If we found XOR, check if it has a constant operand (and not all-ones constant to avoid NOT)
- if (XorOp && OtherOp &&
- ((DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(0)) &&
- !isAllOnesConstant(XorOp.getOperand(0))) ||
- (DAG.isConstantIntBuildVectorOrConstantInt(XorOp.getOperand(1)) &&
- !isAllOnesConstant(XorOp.getOperand(1))))) {
- // Prevent infinite loops: only apply if OtherOp is not also a NOT
- if (!(OtherOp.getOpcode() == ISD::XOR &&
- DAG.isConstantIntBuildVectorOrConstantInt(OtherOp.getOperand(1)) &&
- isAllOnesConstant(OtherOp.getOperand(1)))) {
- // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
- // To: AND(XOR(Constant, a), AND(b, NOT(c)))
- // This allows the andn (b & ~c) to be done in parallel with the xor
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
- return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
- }
+ // If we found the pattern, apply the transformation
+ // Prevent infinite loops by checking OtherOp is not also a NOT
+ if (XorOp && !sd_match(OtherOp, m_Xor(m_Value(), m_AllOnes()))) {
+ // Get the NOT node (either N0 or N1)
+ SDValue NotOp = sd_match(N0, m_Xor(m_Value(), m_AllOnes())) ? N0 : N1;
+
+ // Transform: AND(AND(XOR(Constant, a), b), NOT(c))
+ // To: AND(XOR(Constant, a), AND(b, NOT(c)))
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, OtherOp, NotOp);
+ return DAG.getNode(ISD::AND, DL, VT, XorOp, NewAnd);
}
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d32cb680594c6..cdc97faf394ca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51541,7 +51541,6 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-
/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
index 923d065962081..76056a413f904 100644
--- a/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
+++ b/llvm/test/CodeGen/X86/constant-xor-and-andnot.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=-bmi < %s | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefixes=CHECK,BMI
; Test the optimization described in issue #161630:
; (Constant XOR a) & b & ~c should compile to allow andn to be done in parallel with xor
@@ -7,9 +8,13 @@
define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot:
; CHECK: # %bb.0:
+; NOBMI-NEXT: movq %rdx, %rax
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT: andnq %rsi, %rdx, %rax
-; CHECK-NEXT: andq %rdi, %rax
+; NOBMI-NEXT: andq %rsi, %rdi
+; NOBMI-NEXT: notq %rax
+; NOBMI-NEXT: andq %rdi, %rax
+; BMI-NEXT: andnq %rsi, %rdx, %rax
+; BMI-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
@@ -21,9 +26,13 @@ define i64 @test_constant_xor_and_andnot(i64 %a, i64 %b, i64 %c) {
define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_32:
; CHECK: # %bb.0:
+; NOBMI-NEXT: movl %edx, %eax
; CHECK-NEXT: xorl $5678, %edi # imm = 0x162E
-; CHECK-NEXT: andnl %esi, %edx, %eax
-; CHECK-NEXT: andl %edi, %eax
+; NOBMI-NEXT: andl %esi, %edi
+; NOBMI-NEXT: notl %eax
+; NOBMI-NEXT: andl %edi, %eax
+; BMI-NEXT: andnl %esi, %edx, %eax
+; BMI-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%xor = xor i32 %a, 5678
%and1 = and i32 %xor, %b
@@ -36,9 +45,13 @@ define i32 @test_constant_xor_and_andnot_32(i32 %a, i32 %b, i32 %c) {
define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_swapped:
; CHECK: # %bb.0:
+; NOBMI-NEXT: movq %rdx, %rax
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT: andnq %rsi, %rdx, %rax
-; CHECK-NEXT: andq %rdi, %rax
+; NOBMI-NEXT: andq %rsi, %rdi
+; NOBMI-NEXT: notq %rax
+; NOBMI-NEXT: andq %rdi, %rax
+; BMI-NEXT: andnq %rsi, %rdx, %rax
+; BMI-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %b, %xor
@@ -51,9 +64,13 @@ define i64 @test_constant_xor_and_andnot_swapped(i64 %a, i64 %b, i64 %c) {
define i64 @test_constant_xor_and_andnot_final_swapped(i64 %a, i64 %b, i64 %c) {
; CHECK-LABEL: test_constant_xor_and_andnot_final_swapped:
; CHECK: # %bb.0:
+; NOBMI-NEXT: movq %rdx, %rax
; CHECK-NEXT: xorq $1234, %rdi # imm = 0x4D2
-; CHECK-NEXT: andnq %rsi, %rdx, %rax
-; CHECK-NEXT: andq %rdi, %rax
+; NOBMI-NEXT: andq %rsi, %rdi
+; NOBMI-NEXT: notq %rax
+; NOBMI-NEXT: andq %rdi, %rax
+; BMI-NEXT: andnq %rsi, %rdx, %rax
+; BMI-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%xor = xor i64 %a, 1234
%and1 = and i64 %xor, %b
More information about the llvm-commits
mailing list