[llvm] [InstCombine] Canonicalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table (PR #149530)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 05:38:42 PDT 2025
https://github.com/yafet-a updated https://github.com/llvm/llvm-project/pull/149530
>From cf2f9db7e20976f408f7d33fb84f8eb0bdca1e94 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Sat, 19 Jul 2025 15:02:06 -0700
Subject: [PATCH 1/6] [InstCombine] Add pre-commit tests for boolean
canonicalization (NFC)
---
llvm/test/Transforms/InstCombine/pr97044.ll | 99 +++++++++++++++++++++
1 file changed, 99 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/pr97044.ll
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
new file mode 100644
index 0000000000000..e61fb76ab43ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; Tests for GitHub issue #97044 - Boolean expression canonicalization
+define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test0_4way_or(
+; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[Z:%.*]], -1
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[NOT]]
+; CHECK-NEXT: [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
+; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT: [[AND3:%.*]] = and i32 [[X]], [[NOT2]]
+; CHECK-NEXT: [[AND4:%.*]] = and i32 [[AND3]], [[Z]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND1]], [[AND4]]
+; CHECK-NEXT: [[AND7_DEMORGAN:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT: [[AND9_DEMORGAN:%.*]] = or i32 [[AND7_DEMORGAN]], [[Z]]
+; CHECK-NEXT: [[AND9:%.*]] = xor i32 [[AND9_DEMORGAN]], -1
+; CHECK-NEXT: [[OR10:%.*]] = or i32 [[OR]], [[AND9]]
+; CHECK-NEXT: [[AND11:%.*]] = and i32 [[X]], [[Y]]
+; CHECK-NEXT: [[AND12:%.*]] = and i32 [[AND11]], [[Z]]
+; CHECK-NEXT: [[OR13:%.*]] = or i32 [[OR10]], [[AND12]]
+; CHECK-NEXT: ret i32 [[OR13]]
+;
+ %not = xor i32 %z, -1
+ %and = and i32 %y, %not
+ %and1 = and i32 %and, %x
+ %not2 = xor i32 %y, -1
+ %and3 = and i32 %x, %not2
+ %and4 = and i32 %and3, %z
+ %or = or i32 %and1, %and4
+ %not5 = xor i32 %x, -1
+ %not6 = xor i32 %y, -1
+ %and7 = and i32 %not5, %not6
+ %not8 = xor i32 %z, -1
+ %and9 = and i32 %and7, %not8
+ %or10 = or i32 %or, %and9
+ %and11 = and i32 %x, %y
+ %and12 = and i32 %and11, %z
+ %or13 = or i32 %or10, %and12
+ ret i32 %or13
+}
+define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1_xor_pattern(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[AND4_DEMORGAN:%.*]] = or i32 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z]], [[X]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
+; CHECK-NEXT: [[XOR:%.*]] = or i32 [[AND8]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[XOR]]
+;
+ %not = xor i32 %z, -1
+ %and = and i32 %x, %y
+ %not1 = xor i32 %x, -1
+ %not2 = xor i32 %y, -1
+ %and3 = and i32 %not1, %not2
+ %or = or i32 %and, %and3
+ %and4 = and i32 %not, %or
+ %and5 = and i32 %x, %y
+ %and6 = and i32 %x, %not2
+ %or7 = or i32 %and5, %and6
+ %and8 = and i32 %z, %or7
+ %xor = xor i32 %and4, %and8
+ ret i32 %xor
+}
+define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2_nested_xor(
+; CHECK-NEXT: [[NOT7:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z:%.*]], [[NOT7]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %and = and i32 %x, %y
+ %not = xor i32 %x, -1
+ %not1 = xor i32 %y, -1
+ %and2 = and i32 %not, %not1
+ %or = or i32 %and, %and2
+ %and3 = and i32 %x, %y
+ %not4 = xor i32 %y, -1
+ %and5 = and i32 %x, %not4
+ %or6 = or i32 %and3, %and5
+ %xor = xor i32 %or, %or6
+ %not7 = xor i32 %y, -1
+ %and8 = and i32 %z, %not7
+ %and9 = and i32 %xor, %and8
+ %xor10 = xor i32 %or, %and9
+ %xor11 = xor i32 %xor10, %y
+ %xor12 = xor i32 %xor11, -1
+ ret i32 %xor12
+}
+define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test3_already_optimal(
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[XOR]], -1
+; CHECK-NEXT: ret i32 [[NOT]]
+;
+ %or = or i32 %y, %z
+ %xor = xor i32 %or, %x
+ %not = xor i32 %xor, -1
+ ret i32 %not
+}
>From 3a55b19380ee8d41e897cc0f347c0dbc73f380fb Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Mon, 21 Jul 2025 05:08:39 -0700
Subject: [PATCH 2/6] [InstCombine] Optimised expressions in issue #97044
---
.../InstCombine/InstCombineAndOrXor.cpp | 56 +++++++++++++++++++
llvm/test/Transforms/InstCombine/pr97044.ll | 33 ++++-------
2 files changed, 66 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b231c04319106..088105f6ff9f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3776,6 +3776,43 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^
+ // X)
+ {
+ Value *X, *Y, *Z;
+ Value *Term1, *Term2, *XAndYAndZ;
+ if (match(&I,
+ m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
+ match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
+
+ // (Z & X) | ~((Y ^ X) | Z) -> ~((Y | Z) ^ X)
+ {
+ Value *X, *Y, *Z;
+ Value *ZAndX, *NotPattern;
+
+ if (match(&I, m_c_Or(m_Value(ZAndX), m_Value(NotPattern))) &&
+ match(ZAndX, m_c_And(m_Value(Z), m_Value(X)))) {
+
+ Value *YXorXOrZ;
+ if (match(NotPattern, m_Not(m_Value(YXorXOrZ)))) {
+ Value *YXorX;
+ if (match(YXorXOrZ, m_c_Or(m_Value(YXorX), m_Specific(Z))) &&
+ match(YXorX, m_c_Xor(m_Value(Y), m_Specific(X)))) {
+
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
+ }
+ }
+
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
@@ -5182,6 +5219,25 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
+ // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X &
+ // ~Y)))) -> ~((Y | Z) ^ X)
+ if (match(Op1, m_AllOnes())) {
+ Value *X, *Y, *Z;
+ Value *XorWithY;
+ if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {
+ Value *ZAndNotY;
+ if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {
+ Value *NotY;
+ if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
+ match(NotY, m_Not(m_Specific(Y)))) {
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
+ }
+ }
+
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (Value *V = foldXorOfICmps(LHS, RHS, I))
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index e61fb76ab43ba..9c9bf9aface25 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -3,20 +3,9 @@
; Tests for GitHub issue #97044 - Boolean expression canonicalization
define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @test0_4way_or(
-; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[Z:%.*]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[NOT]]
-; CHECK-NEXT: [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
-; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT: [[AND3:%.*]] = and i32 [[X]], [[NOT2]]
-; CHECK-NEXT: [[AND4:%.*]] = and i32 [[AND3]], [[Z]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND1]], [[AND4]]
-; CHECK-NEXT: [[AND7_DEMORGAN:%.*]] = or i32 [[X]], [[Y]]
-; CHECK-NEXT: [[AND9_DEMORGAN:%.*]] = or i32 [[AND7_DEMORGAN]], [[Z]]
-; CHECK-NEXT: [[AND9:%.*]] = xor i32 [[AND9_DEMORGAN]], -1
-; CHECK-NEXT: [[OR10:%.*]] = or i32 [[OR]], [[AND9]]
-; CHECK-NEXT: [[AND11:%.*]] = and i32 [[X]], [[Y]]
-; CHECK-NEXT: [[AND12:%.*]] = and i32 [[AND11]], [[Z]]
-; CHECK-NEXT: [[OR13:%.*]] = or i32 [[OR10]], [[AND12]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[OR13:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: ret i32 [[OR13]]
;
%not = xor i32 %z, -1
@@ -39,11 +28,9 @@ define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
}
define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @test1_xor_pattern(
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[AND4_DEMORGAN:%.*]] = or i32 [[TMP1]], [[Z:%.*]]
-; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z]], [[X]]
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
-; CHECK-NEXT: [[XOR:%.*]] = or i32 [[AND8]], [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: ret i32 [[XOR]]
;
%not = xor i32 %z, -1
@@ -62,10 +49,10 @@ define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
}
define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @test2_nested_xor(
-; CHECK-NEXT: [[NOT7:%.*]] = xor i32 [[Y:%.*]], -1
-; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z:%.*]], [[NOT7]]
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
-; CHECK-NEXT: ret i32 [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], [[Y]]
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%and = and i32 %x, %y
%not = xor i32 %x, -1
>From 02807e3052967f58bcc31e1d97e70c53f05dcb25 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Tue, 29 Jul 2025 06:39:23 -0700
Subject: [PATCH 3/6] 3 input handled via truth table
---
.../InstCombine/InstCombineAndOrXor.cpp | 255 ++++++++++++++----
1 file changed, 202 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 088105f6ff9f8..563cc25b5463a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -19,6 +19,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
+#include <bitset>
+#include <map>
using namespace llvm;
using namespace PatternMatch;
@@ -47,6 +49,202 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
return Builder.CreateFCmpFMF(NewPred, LHS, RHS, FMF);
}
+/// This is to create optimal 3-variable boolean logic from truth tables.
+/// currently it supports the cases pertaining to the issue 97044. More cases
+/// can be added based on real-world justification for specific 3 input cases
+/// or with reviewer approval all 256 cases can be added (choose the
+/// canonicalizations found
+/// in x86InstCombine.cpp?)
+static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
+ Value *Op1, Value *Op2, Value *Root,
+ IRBuilderBase &Builder, bool HasOneUse) {
+ uint8_t TruthValue = Table.to_ulong();
+
+ // Skip transformation if expression is already simple (at most 2 levels
+ // deep).
+ if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
+ if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+ bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+ !isa<BinaryOperator>(BO->getOperand(1));
+ if (IsSimple)
+ return nullptr;
+ }
+ }
+
+ auto FoldConstant = [&](bool Val) {
+ Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
+ if (Op0->getType()->isVectorTy())
+ Res = ConstantVector::getSplat(
+ cast<VectorType>(Op0->getType())->getElementCount(), Res);
+ return Res;
+ };
+
+ Value *Result = nullptr;
+ switch (TruthValue) {
+ default:
+ return nullptr;
+
+ case 0x00: // Always FALSE
+ Result = FoldConstant(false);
+ break;
+
+ case 0xFF: // Always TRUE
+ Result = FoldConstant(true);
+ break;
+
+ case 0xE1: // ~((Op1 | Op2) ^ Op0)
+ if (!HasOneUse)
+ return nullptr;
+ {
+ Value *Or = Builder.CreateOr(Op1, Op2);
+ Value *Xor = Builder.CreateXor(Or, Op0);
+ Result = Builder.CreateNot(Xor);
+ }
+ break;
+
+ case 0x60: // Op0 & (Op1 ^ Op2)
+ if (!HasOneUse)
+ return nullptr;
+ {
+ Value *Xor = Builder.CreateXor(Op1, Op2);
+ Result = Builder.CreateAnd(Op0, Xor);
+ }
+ break;
+
+ case 0xD2: // ((Op1 | Op2) ^ Op0) ^ Op1
+ if (!HasOneUse)
+ return nullptr;
+ {
+ Value *Or = Builder.CreateOr(Op1, Op2);
+ Value *Xor1 = Builder.CreateXor(Or, Op0);
+ Result = Builder.CreateXor(Xor1, Op1);
+ }
+ break;
+ }
+
+ return Result;
+}
+
+static std::tuple<Value *, Value *, Value *>
+extractThreeVariables(Value *Root) {
+ std::set<Value *> Variables;
+ unsigned NodeCount = 0;
+ const unsigned MaxNodes =
+ 50; // To prevent exponential blowup (see bitwise-hang.ll)
+
+ std::function<void(Value *)> Collect = [&](Value *V) {
+ if (++NodeCount > MaxNodes)
+ return;
+
+ Value *NotV;
+ if (match(V, m_Not(m_Value(NotV)))) {
+ Collect(NotV);
+ return;
+ }
+ if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+ Collect(BO->getOperand(0));
+ Collect(BO->getOperand(1));
+ } else if (isa<Argument>(V) || isa<Instruction>(V)) {
+ if (!isa<Constant>(V) && V != Root) {
+ Variables.insert(V);
+ }
+ }
+ };
+
+ Collect(Root);
+
+ // Bail if we hit the node limit
+ if (NodeCount > MaxNodes)
+ return {nullptr, nullptr, nullptr};
+
+ if (Variables.size() == 3) {
+ auto It = Variables.begin();
+ Value *Op0 = *It++;
+ Value *Op1 = *It++;
+ Value *Op2 = *It;
+ return {Op0, Op1, Op2};
+ }
+ return {nullptr, nullptr, nullptr};
+}
+
+/// Evaluate a boolean expression with concrete variable values.
+static std::optional<bool>
+evaluateBooleanExpression(Value *Expr, const std::map<Value *, bool> &Values) {
+ if (auto It = Values.find(Expr); It != Values.end()) {
+ return It->second;
+ }
+ Value *NotExpr;
+ if (match(Expr, m_Not(m_Value(NotExpr)))) {
+ auto Operand = evaluateBooleanExpression(NotExpr, Values);
+ if (Operand)
+ return !*Operand;
+ return std::nullopt;
+ }
+ if (auto *BO = dyn_cast<BinaryOperator>(Expr)) {
+ auto LHS = evaluateBooleanExpression(BO->getOperand(0), Values);
+ auto RHS = evaluateBooleanExpression(BO->getOperand(1), Values);
+ if (!LHS || !RHS)
+ return std::nullopt;
+
+ switch (BO->getOpcode()) {
+ case Instruction::And:
+ return *LHS && *RHS;
+ case Instruction::Or:
+ return *LHS || *RHS;
+ case Instruction::Xor:
+ return *LHS != *RHS;
+ default:
+ return std::nullopt;
+ }
+ }
+ return std::nullopt;
+}
+
+/// Extracts the truth table from a 3-variable boolean expression.
+/// The truth table is a 8-bit integer where each bit corresponds to a possible
+/// combination of the three variables.
+/// The bits are ordered as follows:
+/// 000, 001, 010, 011, 100, 101, 110, 111
+/// The result is a bitset where the i-th bit is set if the expression is true
+/// for the i-th combination of the variables.
+static std::optional<std::bitset<8>>
+extractThreeBitTruthTable(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
+ std::bitset<8> Table;
+ for (int I = 0; I < 8; I++) {
+ bool Val0 = (I >> 2) & 1;
+ bool Val1 = (I >> 1) & 1;
+ bool Val2 = I & 1;
+ std::map<Value *, bool> Values = {{Op0, Val0}, {Op1, Val1}, {Op2, Val2}};
+ auto Result = evaluateBooleanExpression(Expr, Values);
+ if (!Result)
+ return std::nullopt;
+ Table[I] = *Result;
+ }
+ return Table;
+}
+
+/// Try to canonicalize 3-variable boolean expressions using truth table lookup.
+static Value *foldThreeVarBoolExpr(Value *Root,
+ InstCombiner::BuilderTy &Builder) {
+ // Only proceed if this is a "complex" expression.
+ if (!isa<BinaryOperator>(Root))
+ return nullptr;
+
+ auto [Op0, Op1, Op2] = extractThreeVariables(Root);
+ if (!Op0 || !Op1 || !Op2)
+ return nullptr;
+
+ auto Table = extractThreeBitTruthTable(Root, Op0, Op1, Op2);
+ if (!Table)
+ return nullptr;
+
+ // Only transform expressions with single use to avoid code growth.
+ if (!Root->hasOneUse())
+ return nullptr;
+
+ return createLogicFromTable3Var(*Table, Op0, Op1, Op2, Root, Builder, true);
+}
+
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
/// whether to treat V, Lo, and Hi as signed or not.
@@ -3777,41 +3975,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^
- // X)
- {
- Value *X, *Y, *Z;
- Value *Term1, *Term2, *XAndYAndZ;
- if (match(&I,
- m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
- match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
- Value *YOrZ = Builder.CreateOr(Y, Z);
- Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
- return BinaryOperator::CreateNot(YOrZXorX);
- }
- }
-
- // (Z & X) | ~((Y ^ X) | Z) -> ~((Y | Z) ^ X)
- {
- Value *X, *Y, *Z;
- Value *ZAndX, *NotPattern;
-
- if (match(&I, m_c_Or(m_Value(ZAndX), m_Value(NotPattern))) &&
- match(ZAndX, m_c_And(m_Value(Z), m_Value(X)))) {
-
- Value *YXorXOrZ;
- if (match(NotPattern, m_Not(m_Value(YXorXOrZ)))) {
- Value *YXorX;
- if (match(YXorXOrZ, m_c_Or(m_Value(YXorX), m_Specific(Z))) &&
- match(YXorX, m_c_Xor(m_Value(Y), m_Specific(X)))) {
-
- Value *YOrZ = Builder.CreateOr(Y, Z);
- Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
- return BinaryOperator::CreateNot(YOrZXorX);
- }
- }
- }
- }
+ if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+ return replaceInstUsesWith(I, Canonical);
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
@@ -5219,24 +5384,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
- // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X &
- // ~Y)))) -> ~((Y | Z) ^ X)
- if (match(Op1, m_AllOnes())) {
- Value *X, *Y, *Z;
- Value *XorWithY;
- if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {
- Value *ZAndNotY;
- if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {
- Value *NotY;
- if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
- match(NotY, m_Not(m_Specific(Y)))) {
- Value *YOrZ = Builder.CreateOr(Y, Z);
- Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
- return BinaryOperator::CreateNot(YOrZXorX);
- }
- }
- }
- }
+ if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+ return replaceInstUsesWith(I, Canonical);
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
>From d066a8516a68e5694612533f4fca4d1d618cc4e1 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 01:51:34 -0700
Subject: [PATCH 4/6] Move simple expression check to caller
---
.../InstCombine/InstCombineAndOrXor.cpp | 23 +++++++++----------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 563cc25b5463a..0bd589a7a3f7e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -59,18 +59,6 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
Value *Op1, Value *Op2, Value *Root,
IRBuilderBase &Builder, bool HasOneUse) {
uint8_t TruthValue = Table.to_ulong();
-
- // Skip transformation if expression is already simple (at most 2 levels
- // deep).
- if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
- if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
- bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
- !isa<BinaryOperator>(BO->getOperand(1));
- if (IsSimple)
- return nullptr;
- }
- }
-
auto FoldConstant = [&](bool Val) {
Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
if (Op0->getType()->isVectorTy())
@@ -230,6 +218,17 @@ static Value *foldThreeVarBoolExpr(Value *Root,
if (!isa<BinaryOperator>(Root))
return nullptr;
+ // Skip transformation if expression is already simple (at most 2 levels
+ // deep).
+ if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
+ if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+ bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+ !isa<BinaryOperator>(BO->getOperand(1));
+ if (IsSimple)
+ return nullptr;
+ }
+ }
+
auto [Op0, Op1, Op2] = extractThreeVariables(Root);
if (!Op0 || !Op1 || !Op2)
return nullptr;
>From 4c86e5467b464c5887195fbfc492a48e43b6f675 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 04:01:26 -0700
Subject: [PATCH 5/6] removed recursion + smallptrset used
---
.../InstCombine/InstCombineAndOrXor.cpp | 124 +++++++++++-------
1 file changed, 79 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 0bd589a7a3f7e..f356cdef891d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -20,7 +20,6 @@
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include <bitset>
-#include <map>
using namespace llvm;
using namespace PatternMatch;
@@ -115,77 +114,109 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
static std::tuple<Value *, Value *, Value *>
extractThreeVariables(Value *Root) {
- std::set<Value *> Variables;
+ SmallPtrSet<Value *, 3> Variables;
unsigned NodeCount = 0;
- const unsigned MaxNodes =
- 50; // To prevent exponential blowup (see bitwise-hang.ll)
+ const unsigned MaxNodes = 50; // To prevent exponential blowup with loop
+ // unrolling(see bitreverse-hang.ll)
- std::function<void(Value *)> Collect = [&](Value *V) {
- if (++NodeCount > MaxNodes)
- return;
+ SmallVector<Value *> Worklist;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty() && NodeCount <= MaxNodes) {
+ Value *V = Worklist.pop_back_val();
+ ++NodeCount;
+
+ if (NodeCount > MaxNodes)
+ break;
Value *NotV;
if (match(V, m_Not(m_Value(NotV)))) {
- Collect(NotV);
- return;
+ Worklist.push_back(NotV);
+ continue;
}
if (auto *BO = dyn_cast<BinaryOperator>(V)) {
- Collect(BO->getOperand(0));
- Collect(BO->getOperand(1));
+ Worklist.push_back(BO->getOperand(0));
+ Worklist.push_back(BO->getOperand(1));
} else if (isa<Argument>(V) || isa<Instruction>(V)) {
if (!isa<Constant>(V) && V != Root) {
Variables.insert(V);
}
}
- };
-
- Collect(Root);
+ }
// Bail if we hit the node limit
if (NodeCount > MaxNodes)
return {nullptr, nullptr, nullptr};
if (Variables.size() == 3) {
- auto It = Variables.begin();
- Value *Op0 = *It++;
- Value *Op1 = *It++;
- Value *Op2 = *It;
- return {Op0, Op1, Op2};
+ // Sort variables by pointer value to ensure deterministic ordering
+ SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
+ llvm::sort(SortedVars, [](Value *A, Value *B) { return A < B; });
+ return {SortedVars[0], SortedVars[1], SortedVars[2]};
}
return {nullptr, nullptr, nullptr};
}
/// Evaluate a boolean expression with concrete variable values.
static std::optional<bool>
-evaluateBooleanExpression(Value *Expr, const std::map<Value *, bool> &Values) {
- if (auto It = Values.find(Expr); It != Values.end()) {
- return It->second;
- }
- Value *NotExpr;
- if (match(Expr, m_Not(m_Value(NotExpr)))) {
- auto Operand = evaluateBooleanExpression(NotExpr, Values);
- if (Operand)
- return !*Operand;
- return std::nullopt;
+evaluateBooleanExpression(Value *Expr,
+ const SmallMapVector<Value *, bool, 4> &Values) {
+
+ // Post-order traversal of the expression tree
+ SmallVector<Instruction *> Instructions;
+ SmallVector<Value *> ToVisit;
+ SmallPtrSet<Instruction *, 8> Seen;
+
+ ToVisit.push_back(Expr);
+ while (!ToVisit.empty()) {
+ Value *V = ToVisit.pop_back_val();
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (Seen.insert(I).second) {
+ Instructions.push_back(I);
+ for (Value *Op : I->operands()) {
+ ToVisit.push_back(Op);
+ }
+ }
+ }
}
- if (auto *BO = dyn_cast<BinaryOperator>(Expr)) {
- auto LHS = evaluateBooleanExpression(BO->getOperand(0), Values);
- auto RHS = evaluateBooleanExpression(BO->getOperand(1), Values);
- if (!LHS || !RHS)
- return std::nullopt;
- switch (BO->getOpcode()) {
- case Instruction::And:
- return *LHS && *RHS;
- case Instruction::Or:
- return *LHS || *RHS;
- case Instruction::Xor:
- return *LHS != *RHS;
- default:
- return std::nullopt;
+ llvm::sort(Instructions,
+ [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
+
+ // Now in topological order we can evaluate the expression
+ SmallDenseMap<Value *, bool> Computed(Values.begin(), Values.end());
+
+ for (Instruction *I : Instructions) {
+ Value *NotV;
+ if (match(I, m_Not(m_Value(NotV)))) {
+ auto It = Computed.find(NotV);
+ if (It == Computed.end())
+ return std::nullopt;
+ Computed[I] = !It->second;
+ } else if (auto *BO = dyn_cast<BinaryOperator>(I)) {
+ auto LHSIt = Computed.find(BO->getOperand(0));
+ auto RHSIt = Computed.find(BO->getOperand(1));
+ if (LHSIt == Computed.end() || RHSIt == Computed.end())
+ return std::nullopt;
+
+ switch (BO->getOpcode()) {
+ case Instruction::And:
+ Computed[I] = LHSIt->second && RHSIt->second;
+ break;
+ case Instruction::Or:
+ Computed[I] = LHSIt->second || RHSIt->second;
+ break;
+ case Instruction::Xor:
+ Computed[I] = LHSIt->second != RHSIt->second;
+ break;
+ default:
+ return std::nullopt;
+ }
}
}
- return std::nullopt;
+
+ auto It = Computed.find(Expr);
+ return It != Computed.end() ? std::optional<bool>(It->second) : std::nullopt;
}
/// Extracts the truth table from a 3-variable boolean expression.
@@ -202,7 +233,10 @@ extractThreeBitTruthTable(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
bool Val0 = (I >> 2) & 1;
bool Val1 = (I >> 1) & 1;
bool Val2 = I & 1;
- std::map<Value *, bool> Values = {{Op0, Val0}, {Op1, Val1}, {Op2, Val2}};
+ SmallMapVector<Value *, bool, 4> Values;
+ Values[Op0] = Val0;
+ Values[Op1] = Val1;
+ Values[Op2] = Val2;
auto Result = evaluateBooleanExpression(Expr, Values);
if (!Result)
return std::nullopt;
>From 7a2dc671645ac32403009104e55bf956d94ed21f Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 05:37:47 -0700
Subject: [PATCH 6/6] moved calls to consistent location in each visit function
+ added call to visitAnd
---
.../InstCombine/InstCombineAndOrXor.cpp | 36 ++++++++++---------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f356cdef891d3..71f7d21f55348 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -252,15 +252,18 @@ static Value *foldThreeVarBoolExpr(Value *Root,
if (!isa<BinaryOperator>(Root))
return nullptr;
+ // Early bailout for expressions with too many uses (avoid expensive analysis
+ // andorxor.ll)
+ if (!Root->hasOneUse())
+ return nullptr;
+
// Skip transformation if expression is already simple (at most 2 levels
// deep).
- if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
- if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
- bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
- !isa<BinaryOperator>(BO->getOperand(1));
- if (IsSimple)
- return nullptr;
- }
+ if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+ bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+ !isa<BinaryOperator>(BO->getOperand(1));
+ if (IsSimple)
+ return nullptr;
}
auto [Op0, Op1, Op2] = extractThreeVariables(Root);
@@ -271,10 +274,6 @@ static Value *foldThreeVarBoolExpr(Value *Root,
if (!Table)
return nullptr;
- // Only transform expressions with single use to avoid code growth.
- if (!Root->hasOneUse())
- return nullptr;
-
return createLogicFromTable3Var(*Table, Op0, Op1, Op2, Root, Builder, true);
}
@@ -2628,6 +2627,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
+ if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+ return replaceInstUsesWith(I, Canonical);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -3985,6 +3987,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
+ if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+ return replaceInstUsesWith(I, Canonical);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -4008,9 +4013,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
- return replaceInstUsesWith(I, Canonical);
-
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
@@ -5136,6 +5138,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
+ if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+ return replaceInstUsesWith(I, Canonical);
+
if (Instruction *NewXor = foldXorToXor(I, Builder))
return NewXor;
@@ -5417,9 +5422,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
- if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
- return replaceInstUsesWith(I, Canonical);
-
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (Value *V = foldXorOfICmps(LHS, RHS, I))
More information about the llvm-commits
mailing list