[llvm] [InstCombine] Canonicalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table (PR #149530)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 29 05:12:54 PDT 2025


https://github.com/yafet-a updated https://github.com/llvm/llvm-project/pull/149530

>From cf2f9db7e20976f408f7d33fb84f8eb0bdca1e94 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Sat, 19 Jul 2025 15:02:06 -0700
Subject: [PATCH 01/25] [InstCombine] Add pre-commit tests for boolean
 canonicalization (NFC)

---
 llvm/test/Transforms/InstCombine/pr97044.ll | 99 +++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/pr97044.ll

diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
new file mode 100644
index 0000000000000..e61fb76ab43ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; Tests for GitHub issue #97044 - Boolean expression canonicalization
+define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test0_4way_or(
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[Z:%.*]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[Y:%.*]], [[NOT]]
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
+; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[Y]], -1
+; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[X]], [[NOT2]]
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[AND3]], [[Z]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND1]], [[AND4]]
+; CHECK-NEXT:    [[AND7_DEMORGAN:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[AND9_DEMORGAN:%.*]] = or i32 [[AND7_DEMORGAN]], [[Z]]
+; CHECK-NEXT:    [[AND9:%.*]] = xor i32 [[AND9_DEMORGAN]], -1
+; CHECK-NEXT:    [[OR10:%.*]] = or i32 [[OR]], [[AND9]]
+; CHECK-NEXT:    [[AND11:%.*]] = and i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[AND11]], [[Z]]
+; CHECK-NEXT:    [[OR13:%.*]] = or i32 [[OR10]], [[AND12]]
+; CHECK-NEXT:    ret i32 [[OR13]]
+;
+  %not = xor i32 %z, -1
+  %and = and i32 %y, %not
+  %and1 = and i32 %and, %x
+  %not2 = xor i32 %y, -1
+  %and3 = and i32 %x, %not2
+  %and4 = and i32 %and3, %z
+  %or = or i32 %and1, %and4
+  %not5 = xor i32 %x, -1
+  %not6 = xor i32 %y, -1
+  %and7 = and i32 %not5, %not6
+  %not8 = xor i32 %z, -1
+  %and9 = and i32 %and7, %not8
+  %or10 = or i32 %or, %and9
+  %and11 = and i32 %x, %y
+  %and12 = and i32 %and11, %z
+  %or13 = or i32 %or10, %and12
+  ret i32 %or13
+}
+define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1_xor_pattern(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND4_DEMORGAN:%.*]] = or i32 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z]], [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
+; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[AND8]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %not = xor i32 %z, -1
+  %and = and i32 %x, %y
+  %not1 = xor i32 %x, -1
+  %not2 = xor i32 %y, -1
+  %and3 = and i32 %not1, %not2
+  %or = or i32 %and, %and3
+  %and4 = and i32 %not, %or
+  %and5 = and i32 %x, %y
+  %and6 = and i32 %x, %not2
+  %or7 = or i32 %and5, %and6
+  %and8 = and i32 %z, %or7
+  %xor = xor i32 %and4, %and8
+  ret i32 %xor
+}
+define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2_nested_xor(
+; CHECK-NEXT:    [[NOT7:%.*]] = xor i32 [[Y:%.*]], -1
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z:%.*]], [[NOT7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, %y
+  %not = xor i32 %x, -1
+  %not1 = xor i32 %y, -1
+  %and2 = and i32 %not, %not1
+  %or = or i32 %and, %and2
+  %and3 = and i32 %x, %y
+  %not4 = xor i32 %y, -1
+  %and5 = and i32 %x, %not4
+  %or6 = or i32 %and3, %and5
+  %xor = xor i32 %or, %or6
+  %not7 = xor i32 %y, -1
+  %and8 = and i32 %z, %not7
+  %and9 = and i32 %xor, %and8
+  %xor10 = xor i32 %or, %and9
+  %xor11 = xor i32 %xor10, %y
+  %xor12 = xor i32 %xor11, -1
+  ret i32 %xor12
+}
+define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test3_already_optimal(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[XOR]], -1
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %or = or i32 %y, %z
+  %xor = xor i32 %or, %x
+  %not = xor i32 %xor, -1
+  ret i32 %not
+}

>From 3a55b19380ee8d41e897cc0f347c0dbc73f380fb Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Mon, 21 Jul 2025 05:08:39 -0700
Subject: [PATCH 02/25] [InstCombine] Optimised expressions in issue #97044

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 56 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/pr97044.ll   | 33 ++++-------
 2 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b231c04319106..088105f6ff9f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3776,6 +3776,43 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^
+  // X)
+  {
+    Value *X, *Y, *Z;
+    Value *Term1, *Term2, *XAndYAndZ;
+    if (match(&I,
+              m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
+        match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
+      Value *YOrZ = Builder.CreateOr(Y, Z);
+      Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+      return BinaryOperator::CreateNot(YOrZXorX);
+    }
+  }
+
+  // (Z & X) | ~((Y ^ X) | Z) -> ~((Y | Z) ^ X)
+  {
+    Value *X, *Y, *Z;
+    Value *ZAndX, *NotPattern;
+
+    if (match(&I, m_c_Or(m_Value(ZAndX), m_Value(NotPattern))) &&
+        match(ZAndX, m_c_And(m_Value(Z), m_Value(X)))) {
+
+      Value *YXorXOrZ;
+      if (match(NotPattern, m_Not(m_Value(YXorXOrZ)))) {
+        Value *YXorX;
+        if (match(YXorXOrZ, m_c_Or(m_Value(YXorX), m_Specific(Z))) &&
+            match(YXorX, m_c_Xor(m_Value(Y), m_Specific(X)))) {
+
+          Value *YOrZ = Builder.CreateOr(Y, Z);
+          Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+          return BinaryOperator::CreateNot(YOrZXorX);
+        }
+      }
+    }
+  }
+
   Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
     if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
@@ -5182,6 +5219,25 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
     }
   }
 
+  // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X &
+  // ~Y)))) -> ~((Y | Z) ^ X)
+  if (match(Op1, m_AllOnes())) {
+    Value *X, *Y, *Z;
+    Value *XorWithY;
+    if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {
+      Value *ZAndNotY;
+      if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {
+        Value *NotY;
+        if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
+            match(NotY, m_Not(m_Specific(Y)))) {
+          Value *YOrZ = Builder.CreateOr(Y, Z);
+          Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+          return BinaryOperator::CreateNot(YOrZXorX);
+        }
+      }
+    }
+  }
+
   if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
     if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
       if (Value *V = foldXorOfICmps(LHS, RHS, I))
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index e61fb76ab43ba..9c9bf9aface25 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -3,20 +3,9 @@
 ; Tests for GitHub issue #97044 - Boolean expression canonicalization
 define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @test0_4way_or(
-; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[Z:%.*]], -1
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[Y:%.*]], [[NOT]]
-; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[AND]], [[X:%.*]]
-; CHECK-NEXT:    [[NOT2:%.*]] = xor i32 [[Y]], -1
-; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[X]], [[NOT2]]
-; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[AND3]], [[Z]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND1]], [[AND4]]
-; CHECK-NEXT:    [[AND7_DEMORGAN:%.*]] = or i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[AND9_DEMORGAN:%.*]] = or i32 [[AND7_DEMORGAN]], [[Z]]
-; CHECK-NEXT:    [[AND9:%.*]] = xor i32 [[AND9_DEMORGAN]], -1
-; CHECK-NEXT:    [[OR10:%.*]] = or i32 [[OR]], [[AND9]]
-; CHECK-NEXT:    [[AND11:%.*]] = and i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[AND12:%.*]] = and i32 [[AND11]], [[Z]]
-; CHECK-NEXT:    [[OR13:%.*]] = or i32 [[OR10]], [[AND12]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[OR13:%.*]] = xor i32 [[TMP2]], -1
 ; CHECK-NEXT:    ret i32 [[OR13]]
 ;
   %not = xor i32 %z, -1
@@ -39,11 +28,9 @@ define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
 }
 define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @test1_xor_pattern(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[AND4_DEMORGAN:%.*]] = or i32 [[TMP1]], [[Z:%.*]]
-; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z]], [[X]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
-; CHECK-NEXT:    [[XOR:%.*]] = or i32 [[AND8]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP2]], -1
 ; CHECK-NEXT:    ret i32 [[XOR]]
 ;
   %not = xor i32 %z, -1
@@ -62,10 +49,10 @@ define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
 }
 define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @test2_nested_xor(
-; CHECK-NEXT:    [[NOT7:%.*]] = xor i32 [[Y:%.*]], -1
-; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z:%.*]], [[NOT7]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], [[Y]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %and = and i32 %x, %y
   %not = xor i32 %x, -1

>From 02807e3052967f58bcc31e1d97e70c53f05dcb25 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Tue, 29 Jul 2025 06:39:23 -0700
Subject: [PATCH 03/25] 3 input handled via truth table

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 255 ++++++++++++++----
 1 file changed, 202 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 088105f6ff9f8..563cc25b5463a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -19,6 +19,8 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <bitset>
+#include <map>
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -47,6 +49,202 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
   return Builder.CreateFCmpFMF(NewPred, LHS, RHS, FMF);
 }
 
+/// This is to create optimal 3-variable boolean logic from truth tables.
+/// currently it supports the cases pertaining to the issue 97044. More cases
+/// can be added based on real-world justification for specific 3 input cases
+///  or with reviewer approval all 256 cases can be added (choose the
+///  canonicalizations found
+/// in x86InstCombine.cpp?)
+static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
+                                       Value *Op1, Value *Op2, Value *Root,
+                                       IRBuilderBase &Builder, bool HasOneUse) {
+  uint8_t TruthValue = Table.to_ulong();
+
+  // Skip transformation if expression is already simple (at most 2 levels
+  // deep).
+  if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
+    if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+      bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+                      !isa<BinaryOperator>(BO->getOperand(1));
+      if (IsSimple)
+        return nullptr;
+    }
+  }
+
+  auto FoldConstant = [&](bool Val) {
+    Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
+    if (Op0->getType()->isVectorTy())
+      Res = ConstantVector::getSplat(
+          cast<VectorType>(Op0->getType())->getElementCount(), Res);
+    return Res;
+  };
+
+  Value *Result = nullptr;
+  switch (TruthValue) {
+  default:
+    return nullptr;
+
+  case 0x00: // Always FALSE
+    Result = FoldConstant(false);
+    break;
+
+  case 0xFF: // Always TRUE
+    Result = FoldConstant(true);
+    break;
+
+  case 0xE1: // ~((Op1 | Op2) ^ Op0)
+    if (!HasOneUse)
+      return nullptr;
+    {
+      Value *Or = Builder.CreateOr(Op1, Op2);
+      Value *Xor = Builder.CreateXor(Or, Op0);
+      Result = Builder.CreateNot(Xor);
+    }
+    break;
+
+  case 0x60: // Op0 & (Op1 ^ Op2)
+    if (!HasOneUse)
+      return nullptr;
+    {
+      Value *Xor = Builder.CreateXor(Op1, Op2);
+      Result = Builder.CreateAnd(Op0, Xor);
+    }
+    break;
+
+  case 0xD2: // ((Op1 | Op2) ^ Op0) ^ Op1
+    if (!HasOneUse)
+      return nullptr;
+    {
+      Value *Or = Builder.CreateOr(Op1, Op2);
+      Value *Xor1 = Builder.CreateXor(Or, Op0);
+      Result = Builder.CreateXor(Xor1, Op1);
+    }
+    break;
+  }
+
+  return Result;
+}
+
+static std::tuple<Value *, Value *, Value *>
+extractThreeVariables(Value *Root) {
+  std::set<Value *> Variables;
+  unsigned NodeCount = 0;
+  const unsigned MaxNodes =
+      50; // To prevent exponential blowup (see bitwise-hang.ll)
+
+  std::function<void(Value *)> Collect = [&](Value *V) {
+    if (++NodeCount > MaxNodes)
+      return;
+
+    Value *NotV;
+    if (match(V, m_Not(m_Value(NotV)))) {
+      Collect(NotV);
+      return;
+    }
+    if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+      Collect(BO->getOperand(0));
+      Collect(BO->getOperand(1));
+    } else if (isa<Argument>(V) || isa<Instruction>(V)) {
+      if (!isa<Constant>(V) && V != Root) {
+        Variables.insert(V);
+      }
+    }
+  };
+
+  Collect(Root);
+
+  // Bail if we hit the node limit
+  if (NodeCount > MaxNodes)
+    return {nullptr, nullptr, nullptr};
+
+  if (Variables.size() == 3) {
+    auto It = Variables.begin();
+    Value *Op0 = *It++;
+    Value *Op1 = *It++;
+    Value *Op2 = *It;
+    return {Op0, Op1, Op2};
+  }
+  return {nullptr, nullptr, nullptr};
+}
+
+/// Evaluate a boolean expression with concrete variable values.
+static std::optional<bool>
+evaluateBooleanExpression(Value *Expr, const std::map<Value *, bool> &Values) {
+  if (auto It = Values.find(Expr); It != Values.end()) {
+    return It->second;
+  }
+  Value *NotExpr;
+  if (match(Expr, m_Not(m_Value(NotExpr)))) {
+    auto Operand = evaluateBooleanExpression(NotExpr, Values);
+    if (Operand)
+      return !*Operand;
+    return std::nullopt;
+  }
+  if (auto *BO = dyn_cast<BinaryOperator>(Expr)) {
+    auto LHS = evaluateBooleanExpression(BO->getOperand(0), Values);
+    auto RHS = evaluateBooleanExpression(BO->getOperand(1), Values);
+    if (!LHS || !RHS)
+      return std::nullopt;
+
+    switch (BO->getOpcode()) {
+    case Instruction::And:
+      return *LHS && *RHS;
+    case Instruction::Or:
+      return *LHS || *RHS;
+    case Instruction::Xor:
+      return *LHS != *RHS;
+    default:
+      return std::nullopt;
+    }
+  }
+  return std::nullopt;
+}
+
+/// Extracts the truth table from a 3-variable boolean expression.
+/// The truth table is a 8-bit integer where each bit corresponds to a possible
+/// combination of the three variables.
+/// The bits are ordered as follows:
+/// 000, 001, 010, 011, 100, 101, 110, 111
+/// The result is a bitset where the i-th bit is set if the expression is true
+/// for the i-th combination of the variables.
+static std::optional<std::bitset<8>>
+extractThreeBitTruthTable(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
+  std::bitset<8> Table;
+  for (int I = 0; I < 8; I++) {
+    bool Val0 = (I >> 2) & 1;
+    bool Val1 = (I >> 1) & 1;
+    bool Val2 = I & 1;
+    std::map<Value *, bool> Values = {{Op0, Val0}, {Op1, Val1}, {Op2, Val2}};
+    auto Result = evaluateBooleanExpression(Expr, Values);
+    if (!Result)
+      return std::nullopt;
+    Table[I] = *Result;
+  }
+  return Table;
+}
+
+/// Try to canonicalize 3-variable boolean expressions using truth table lookup.
+static Value *foldThreeVarBoolExpr(Value *Root,
+                                   InstCombiner::BuilderTy &Builder) {
+  // Only proceed if this is a "complex" expression.
+  if (!isa<BinaryOperator>(Root))
+    return nullptr;
+
+  auto [Op0, Op1, Op2] = extractThreeVariables(Root);
+  if (!Op0 || !Op1 || !Op2)
+    return nullptr;
+
+  auto Table = extractThreeBitTruthTable(Root, Op0, Op1, Op2);
+  if (!Table)
+    return nullptr;
+
+  // Only transform expressions with single use to avoid code growth.
+  if (!Root->hasOneUse())
+    return nullptr;
+
+  return createLogicFromTable3Var(*Table, Op0, Op1, Op2, Root, Builder, true);
+}
+
 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
 /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
 /// whether to treat V, Lo, and Hi as signed or not.
@@ -3777,41 +3975,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^
-  // X)
-  {
-    Value *X, *Y, *Z;
-    Value *Term1, *Term2, *XAndYAndZ;
-    if (match(&I,
-              m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
-        match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
-      Value *YOrZ = Builder.CreateOr(Y, Z);
-      Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
-      return BinaryOperator::CreateNot(YOrZXorX);
-    }
-  }
-
-  // (Z & X) | ~((Y ^ X) | Z) -> ~((Y | Z) ^ X)
-  {
-    Value *X, *Y, *Z;
-    Value *ZAndX, *NotPattern;
-
-    if (match(&I, m_c_Or(m_Value(ZAndX), m_Value(NotPattern))) &&
-        match(ZAndX, m_c_And(m_Value(Z), m_Value(X)))) {
-
-      Value *YXorXOrZ;
-      if (match(NotPattern, m_Not(m_Value(YXorXOrZ)))) {
-        Value *YXorX;
-        if (match(YXorXOrZ, m_c_Or(m_Value(YXorX), m_Specific(Z))) &&
-            match(YXorX, m_c_Xor(m_Value(Y), m_Specific(X)))) {
-
-          Value *YOrZ = Builder.CreateOr(Y, Z);
-          Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
-          return BinaryOperator::CreateNot(YOrZXorX);
-        }
-      }
-    }
-  }
+  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+    return replaceInstUsesWith(I, Canonical);
 
   Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
@@ -5219,24 +5384,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
     }
   }
 
-  // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X &
-  // ~Y)))) -> ~((Y | Z) ^ X)
-  if (match(Op1, m_AllOnes())) {
-    Value *X, *Y, *Z;
-    Value *XorWithY;
-    if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {
-      Value *ZAndNotY;
-      if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {
-        Value *NotY;
-        if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
-            match(NotY, m_Not(m_Specific(Y)))) {
-          Value *YOrZ = Builder.CreateOr(Y, Z);
-          Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
-          return BinaryOperator::CreateNot(YOrZXorX);
-        }
-      }
-    }
-  }
+  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+    return replaceInstUsesWith(I, Canonical);
 
   if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
     if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))

>From d066a8516a68e5694612533f4fca4d1d618cc4e1 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 01:51:34 -0700
Subject: [PATCH 04/25] Move simple expression check to caller

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 563cc25b5463a..0bd589a7a3f7e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -59,18 +59,6 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
                                        Value *Op1, Value *Op2, Value *Root,
                                        IRBuilderBase &Builder, bool HasOneUse) {
   uint8_t TruthValue = Table.to_ulong();
-
-  // Skip transformation if expression is already simple (at most 2 levels
-  // deep).
-  if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
-    if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
-      bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
-                      !isa<BinaryOperator>(BO->getOperand(1));
-      if (IsSimple)
-        return nullptr;
-    }
-  }
-
   auto FoldConstant = [&](bool Val) {
     Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
     if (Op0->getType()->isVectorTy())
@@ -230,6 +218,17 @@ static Value *foldThreeVarBoolExpr(Value *Root,
   if (!isa<BinaryOperator>(Root))
     return nullptr;
 
+  // Skip transformation if expression is already simple (at most 2 levels
+  // deep).
+  if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
+    if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+      bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+                      !isa<BinaryOperator>(BO->getOperand(1));
+      if (IsSimple)
+        return nullptr;
+    }
+  }
+
   auto [Op0, Op1, Op2] = extractThreeVariables(Root);
   if (!Op0 || !Op1 || !Op2)
     return nullptr;

>From 4c86e5467b464c5887195fbfc492a48e43b6f675 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 04:01:26 -0700
Subject: [PATCH 05/25] removed recursion + smallptrset used

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 124 +++++++++++-------
 1 file changed, 79 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 0bd589a7a3f7e..f356cdef891d3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <bitset>
-#include <map>
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -115,77 +114,109 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
 
 static std::tuple<Value *, Value *, Value *>
 extractThreeVariables(Value *Root) {
-  std::set<Value *> Variables;
+  SmallPtrSet<Value *, 3> Variables;
   unsigned NodeCount = 0;
-  const unsigned MaxNodes =
-      50; // To prevent exponential blowup (see bitwise-hang.ll)
+  const unsigned MaxNodes = 50; // To prevent exponential blowup with loop
+                                // unrolling(see bitreverse-hang.ll)
 
-  std::function<void(Value *)> Collect = [&](Value *V) {
-    if (++NodeCount > MaxNodes)
-      return;
+  SmallVector<Value *> Worklist;
+  Worklist.push_back(Root);
+
+  while (!Worklist.empty() && NodeCount <= MaxNodes) {
+    Value *V = Worklist.pop_back_val();
+    ++NodeCount;
+
+    if (NodeCount > MaxNodes)
+      break;
 
     Value *NotV;
     if (match(V, m_Not(m_Value(NotV)))) {
-      Collect(NotV);
-      return;
+      Worklist.push_back(NotV);
+      continue;
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-      Collect(BO->getOperand(0));
-      Collect(BO->getOperand(1));
+      Worklist.push_back(BO->getOperand(0));
+      Worklist.push_back(BO->getOperand(1));
     } else if (isa<Argument>(V) || isa<Instruction>(V)) {
       if (!isa<Constant>(V) && V != Root) {
         Variables.insert(V);
       }
     }
-  };
-
-  Collect(Root);
+  }
 
   // Bail if we hit the node limit
   if (NodeCount > MaxNodes)
     return {nullptr, nullptr, nullptr};
 
   if (Variables.size() == 3) {
-    auto It = Variables.begin();
-    Value *Op0 = *It++;
-    Value *Op1 = *It++;
-    Value *Op2 = *It;
-    return {Op0, Op1, Op2};
+    // Sort variables by pointer value to ensure deterministic ordering
+    SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
+    llvm::sort(SortedVars, [](Value *A, Value *B) { return A < B; });
+    return {SortedVars[0], SortedVars[1], SortedVars[2]};
   }
   return {nullptr, nullptr, nullptr};
 }
 
 /// Evaluate a boolean expression with concrete variable values.
 static std::optional<bool>
-evaluateBooleanExpression(Value *Expr, const std::map<Value *, bool> &Values) {
-  if (auto It = Values.find(Expr); It != Values.end()) {
-    return It->second;
-  }
-  Value *NotExpr;
-  if (match(Expr, m_Not(m_Value(NotExpr)))) {
-    auto Operand = evaluateBooleanExpression(NotExpr, Values);
-    if (Operand)
-      return !*Operand;
-    return std::nullopt;
+evaluateBooleanExpression(Value *Expr,
+                          const SmallMapVector<Value *, bool, 4> &Values) {
+
+  // Post-order traversal of the expression tree
+  SmallVector<Instruction *> Instructions;
+  SmallVector<Value *> ToVisit;
+  SmallPtrSet<Instruction *, 8> Seen;
+
+  ToVisit.push_back(Expr);
+  while (!ToVisit.empty()) {
+    Value *V = ToVisit.pop_back_val();
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      if (Seen.insert(I).second) {
+        Instructions.push_back(I);
+        for (Value *Op : I->operands()) {
+          ToVisit.push_back(Op);
+        }
+      }
+    }
   }
-  if (auto *BO = dyn_cast<BinaryOperator>(Expr)) {
-    auto LHS = evaluateBooleanExpression(BO->getOperand(0), Values);
-    auto RHS = evaluateBooleanExpression(BO->getOperand(1), Values);
-    if (!LHS || !RHS)
-      return std::nullopt;
 
-    switch (BO->getOpcode()) {
-    case Instruction::And:
-      return *LHS && *RHS;
-    case Instruction::Or:
-      return *LHS || *RHS;
-    case Instruction::Xor:
-      return *LHS != *RHS;
-    default:
-      return std::nullopt;
+  llvm::sort(Instructions,
+             [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
+
+  //  Now in topological order we can evaluate the expression
+  SmallDenseMap<Value *, bool> Computed(Values.begin(), Values.end());
+
+  for (Instruction *I : Instructions) {
+    Value *NotV;
+    if (match(I, m_Not(m_Value(NotV)))) {
+      auto It = Computed.find(NotV);
+      if (It == Computed.end())
+        return std::nullopt;
+      Computed[I] = !It->second;
+    } else if (auto *BO = dyn_cast<BinaryOperator>(I)) {
+      auto LHSIt = Computed.find(BO->getOperand(0));
+      auto RHSIt = Computed.find(BO->getOperand(1));
+      if (LHSIt == Computed.end() || RHSIt == Computed.end())
+        return std::nullopt;
+
+      switch (BO->getOpcode()) {
+      case Instruction::And:
+        Computed[I] = LHSIt->second && RHSIt->second;
+        break;
+      case Instruction::Or:
+        Computed[I] = LHSIt->second || RHSIt->second;
+        break;
+      case Instruction::Xor:
+        Computed[I] = LHSIt->second != RHSIt->second;
+        break;
+      default:
+        return std::nullopt;
+      }
     }
   }
-  return std::nullopt;
+
+  auto It = Computed.find(Expr);
+  return It != Computed.end() ? std::optional<bool>(It->second) : std::nullopt;
 }
 
 /// Extracts the truth table from a 3-variable boolean expression.
@@ -202,7 +233,10 @@ extractThreeBitTruthTable(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
     bool Val0 = (I >> 2) & 1;
     bool Val1 = (I >> 1) & 1;
     bool Val2 = I & 1;
-    std::map<Value *, bool> Values = {{Op0, Val0}, {Op1, Val1}, {Op2, Val2}};
+    SmallMapVector<Value *, bool, 4> Values;
+    Values[Op0] = Val0;
+    Values[Op1] = Val1;
+    Values[Op2] = Val2;
     auto Result = evaluateBooleanExpression(Expr, Values);
     if (!Result)
       return std::nullopt;

>From 7a2dc671645ac32403009104e55bf956d94ed21f Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 7 Aug 2025 05:37:47 -0700
Subject: [PATCH 06/25] moved calls to consistent location in each visit
 function + added call to visitAnd

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f356cdef891d3..71f7d21f55348 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -252,15 +252,18 @@ static Value *foldThreeVarBoolExpr(Value *Root,
   if (!isa<BinaryOperator>(Root))
     return nullptr;
 
+  // Early bailout for expressions with too many uses (avoid expensive analysis
+  // andorxor.ll)
+  if (!Root->hasOneUse())
+    return nullptr;
+
   // Skip transformation if expression is already simple (at most 2 levels
   // deep).
-  if (Root->hasOneUse() && isa<BinaryOperator>(Root)) {
-    if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
-      bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
-                      !isa<BinaryOperator>(BO->getOperand(1));
-      if (IsSimple)
-        return nullptr;
-    }
+  if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
+    bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
+                    !isa<BinaryOperator>(BO->getOperand(1));
+    if (IsSimple)
+      return nullptr;
   }
 
   auto [Op0, Op1, Op2] = extractThreeVariables(Root);
@@ -271,10 +274,6 @@ static Value *foldThreeVarBoolExpr(Value *Root,
   if (!Table)
     return nullptr;
 
-  // Only transform expressions with single use to avoid code growth.
-  if (!Root->hasOneUse())
-    return nullptr;
-
   return createLogicFromTable3Var(*Table, Op0, Op1, Op2, Root, Builder, true);
 }
 
@@ -2628,6 +2627,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
+  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+    return replaceInstUsesWith(I, Canonical);
+
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -3985,6 +3987,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
+  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+    return replaceInstUsesWith(I, Canonical);
+
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -4008,9 +4013,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
-    return replaceInstUsesWith(I, Canonical);
-
   Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
     if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
@@ -5136,6 +5138,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
+  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+    return replaceInstUsesWith(I, Canonical);
+
   if (Instruction *NewXor = foldXorToXor(I, Builder))
     return NewXor;
 
@@ -5417,9 +5422,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
     }
   }
 
-  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
-    return replaceInstUsesWith(I, Canonical);
-
   if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
     if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
       if (Value *V = foldXorOfICmps(LHS, RHS, I))

>From 6772db523c4db9c45e571cee098945fb11b8a422 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Mon, 11 Aug 2025 03:53:31 -0700
Subject: [PATCH 07/25] traverse only if node belongs to expr tree

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 71f7d21f55348..f78864e547b43 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -115,28 +115,31 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
 static std::tuple<Value *, Value *, Value *>
 extractThreeVariables(Value *Root) {
   SmallPtrSet<Value *, 3> Variables;
-  unsigned NodeCount = 0;
-  const unsigned MaxNodes = 50; // To prevent exponential blowup with loop
-                                // unrolling(see bitreverse-hang.ll)
-
+  SmallPtrSet<Value *, 32> Visited; // Prevent hanging during loop unrolling
+                                    // (see bitreverse-hang.ll)
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
-  while (!Worklist.empty() && NodeCount <= MaxNodes) {
+  while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
-    ++NodeCount;
-
-    if (NodeCount > MaxNodes)
-      break;
 
     Value *NotV;
     if (match(V, m_Not(m_Value(NotV)))) {
-      Worklist.push_back(NotV);
+      Visited.insert(NotV);
+      if (V == Root ||
+          V->hasOneUse()) { // Due to lack of cost-based heuristic, only
+                            // traverse if it belongs to this expression tree
+        Worklist.push_back(NotV);
+      }
       continue;
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-      Worklist.push_back(BO->getOperand(0));
-      Worklist.push_back(BO->getOperand(1));
+      if (V == Root || V->hasOneUse()) {
+        Visited.insert(BO->getOperand(0));
+        Visited.insert(BO->getOperand(1));
+        Worklist.push_back(BO->getOperand(0));
+        Worklist.push_back(BO->getOperand(1));
+      }
     } else if (isa<Argument>(V) || isa<Instruction>(V)) {
       if (!isa<Constant>(V) && V != Root) {
         Variables.insert(V);
@@ -144,10 +147,6 @@ extractThreeVariables(Value *Root) {
     }
   }
 
-  // Bail if we hit the node limit
-  if (NodeCount > MaxNodes)
-    return {nullptr, nullptr, nullptr};
-
   if (Variables.size() == 3) {
     // Sort variables by pointer value to ensure deterministic ordering
     SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());

>From 5405486dd970588dd65ff6b177832d97620f0c21 Mon Sep 17 00:00:00 2001
From: YafetBeyene <127161378+yafet-a at users.noreply.github.com>
Date: Mon, 11 Aug 2025 15:28:54 +0100
Subject: [PATCH 08/25] Refactor

Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index f78864e547b43..e3122738e33b6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -59,11 +59,8 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
                                        IRBuilderBase &Builder, bool HasOneUse) {
   uint8_t TruthValue = Table.to_ulong();
   auto FoldConstant = [&](bool Val) {
-    Constant *Res = Val ? Builder.getTrue() : Builder.getFalse();
-    if (Op0->getType()->isVectorTy())
-      Res = ConstantVector::getSplat(
-          cast<VectorType>(Op0->getType())->getElementCount(), Res);
-    return Res;
+    Type *Ty = Op0->getType();
+    return Val ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty);
   };
 
   Value *Result = nullptr;

>From 650a7ab467efcd9d136c28a5a4d7251d25081041 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 13 Aug 2025 02:19:14 -0700
Subject: [PATCH 09/25] check for instructions being in the same bb to avoid
 comesBefore cross bb assertion

---
 .../Transforms/InstCombine/InstCombineAndOrXor.cpp    | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ca325dde5678b..95b7780a70380 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -179,6 +179,17 @@ evaluateBooleanExpression(Value *Expr,
     }
   }
 
+  // Check for instructions being in the same BB
+  if (!Instructions.empty()) {
+    BasicBlock *FirstBB = Instructions.front()->getParent();
+    if (!llvm::all_of(Instructions, [FirstBB](Instruction *I) {
+          return I->getParent() == FirstBB;
+        })) {
+      return std::nullopt;
+    }
+  }
+
+  // Sort instructions within the same BB
   llvm::sort(Instructions,
              [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
 

>From 18e576e683cb268e81fdd6bfcc8b035287db8208 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 14 Aug 2025 07:20:53 -0700
Subject: [PATCH 10/25] review (batch evaluation, refactors)

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 176 +++++++-----------
 1 file changed, 69 insertions(+), 107 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 95b7780a70380..1e8c0715ab502 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -59,7 +59,7 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
 /// in x86InstCombine.cpp?)
 static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
                                        Value *Op1, Value *Op2, Value *Root,
-                                       IRBuilderBase &Builder, bool HasOneUse) {
+                                       IRBuilderBase &Builder) {
   uint8_t TruthValue = Table.to_ulong();
   auto FoldConstant = [&](bool Val) {
     Type *Ty = Op0->getType();
@@ -70,43 +70,29 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
   switch (TruthValue) {
   default:
     return nullptr;
-
   case 0x00: // Always FALSE
     Result = FoldConstant(false);
     break;
-
   case 0xFF: // Always TRUE
     Result = FoldConstant(true);
     break;
-
   case 0xE1: // ~((Op1 | Op2) ^ Op0)
-    if (!HasOneUse)
-      return nullptr;
-    {
-      Value *Or = Builder.CreateOr(Op1, Op2);
-      Value *Xor = Builder.CreateXor(Or, Op0);
-      Result = Builder.CreateNot(Xor);
-    }
-    break;
-
+  {
+    Value *Or = Builder.CreateOr(Op1, Op2);
+    Value *Xor = Builder.CreateXor(Or, Op0);
+    Result = Builder.CreateNot(Xor);
+  } break;
   case 0x60: // Op0 & (Op1 ^ Op2)
-    if (!HasOneUse)
-      return nullptr;
-    {
-      Value *Xor = Builder.CreateXor(Op1, Op2);
-      Result = Builder.CreateAnd(Op0, Xor);
-    }
-    break;
-
+  {
+    Value *Xor = Builder.CreateXor(Op1, Op2);
+    Result = Builder.CreateAnd(Op0, Xor);
+  } break;
   case 0xD2: // ((Op1 | Op2) ^ Op0) ^ Op1
-    if (!HasOneUse)
-      return nullptr;
-    {
-      Value *Or = Builder.CreateOr(Op1, Op2);
-      Value *Xor1 = Builder.CreateXor(Or, Op0);
-      Result = Builder.CreateXor(Xor1, Op1);
-    }
-    break;
+  {
+    Value *Or = Builder.CreateOr(Op1, Op2);
+    Value *Xor1 = Builder.CreateXor(Or, Op0);
+    Result = Builder.CreateXor(Xor1, Op1);
+  } break;
   }
 
   return Result;
@@ -120,6 +106,9 @@ extractThreeVariables(Value *Root) {
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
+  // Track all instructions to ensure they're in the same BB
+  BasicBlock *FirstBB = nullptr;
+
   while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
 
@@ -134,6 +123,15 @@ extractThreeVariables(Value *Root) {
       continue;
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+      if (!BO->isBitwiseLogicOp())
+        return {nullptr, nullptr, nullptr};
+
+      // Check BB consistency
+      if (!FirstBB)
+        FirstBB = BO->getParent();
+      else if (BO->getParent() != FirstBB)
+        return {nullptr, nullptr, nullptr};
+
       if (V == Root || V->hasOneUse()) {
         Visited.insert(BO->getOperand(0));
         Visited.insert(BO->getOperand(1));
@@ -148,19 +146,22 @@ extractThreeVariables(Value *Root) {
   }
 
   if (Variables.size() == 3) {
-    // Sort variables by pointer value to ensure deterministic ordering
+    // Sort variables by instruction order
     SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
-    llvm::sort(SortedVars, [](Value *A, Value *B) { return A < B; });
+    llvm::sort(SortedVars, [](Value *A, Value *B) {
+      if (auto *IA = dyn_cast<Instruction>(A))
+        if (auto *IB = dyn_cast<Instruction>(B))
+          return IA->comesBefore(IB);
+      return A < B;
+    });
     return {SortedVars[0], SortedVars[1], SortedVars[2]};
   }
   return {nullptr, nullptr, nullptr};
 }
 
-/// Evaluate a boolean expression with concrete variable values.
-static std::optional<bool>
-evaluateBooleanExpression(Value *Expr,
-                          const SmallMapVector<Value *, bool, 4> &Values) {
-
+/// Evaluate a boolean expression with bit-vector inputs for all 8 combinations.
+static std::optional<std::bitset<8>>
+evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
   // Post-order traversal of the expression tree
   SmallVector<Instruction *> Instructions;
   SmallVector<Value *> ToVisit;
@@ -179,45 +180,42 @@ evaluateBooleanExpression(Value *Expr,
     }
   }
 
-  // Check for instructions being in the same BB
-  if (!Instructions.empty()) {
-    BasicBlock *FirstBB = Instructions.front()->getParent();
-    if (!llvm::all_of(Instructions, [FirstBB](Instruction *I) {
-          return I->getParent() == FirstBB;
-        })) {
-      return std::nullopt;
-    }
-  }
-
   // Sort instructions within the same BB
   llvm::sort(Instructions,
              [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
 
-  //  Now in topological order we can evaluate the expression
-  SmallDenseMap<Value *, bool> Computed(Values.begin(), Values.end());
+  // Initialize bit-vector values for the 3 variables
+  // Op0: 0b11110000 (true for combinations 000,001,010,011)
+  // Op1: 0b11001100 (true for combinations 000,001,100,101)
+  // Op2: 0b10101010 (true for combinations 000,010,100,110)
+  SmallDenseMap<Value *, std::bitset<8>> Computed;
+  Computed[Op0] = std::bitset<8>(0xF0); // 11110000
+  Computed[Op1] = std::bitset<8>(0xCC); // 11001100
+  Computed[Op2] = std::bitset<8>(0xAA); // 10101010
 
   for (Instruction *I : Instructions) {
     Value *NotV;
     if (match(I, m_Not(m_Value(NotV)))) {
-      auto It = Computed.find(NotV);
-      if (It == Computed.end())
+      if (!Computed.count(NotV))
         return std::nullopt;
-      Computed[I] = !It->second;
+      Computed[I] = ~Computed.at(NotV); // Bitwise NOT
     } else if (auto *BO = dyn_cast<BinaryOperator>(I)) {
-      auto LHSIt = Computed.find(BO->getOperand(0));
-      auto RHSIt = Computed.find(BO->getOperand(1));
-      if (LHSIt == Computed.end() || RHSIt == Computed.end())
+      if (!Computed.count(BO->getOperand(0)) ||
+          !Computed.count(BO->getOperand(1)))
         return std::nullopt;
 
+      auto &LHS = Computed.at(BO->getOperand(0));
+      auto &RHS = Computed.at(BO->getOperand(1));
+
       switch (BO->getOpcode()) {
       case Instruction::And:
-        Computed[I] = LHSIt->second && RHSIt->second;
+        Computed[I] = LHS & RHS; // Bitwise AND
         break;
       case Instruction::Or:
-        Computed[I] = LHSIt->second || RHSIt->second;
+        Computed[I] = LHS | RHS; // Bitwise OR
         break;
       case Instruction::Xor:
-        Computed[I] = LHSIt->second != RHSIt->second;
+        Computed[I] = LHS ^ RHS; // Bitwise XOR
         break;
       default:
         return std::nullopt;
@@ -226,65 +224,30 @@ evaluateBooleanExpression(Value *Expr,
   }
 
   auto It = Computed.find(Expr);
-  return It != Computed.end() ? std::optional<bool>(It->second) : std::nullopt;
-}
-
-/// Extracts the truth table from a 3-variable boolean expression.
-/// The truth table is a 8-bit integer where each bit corresponds to a possible
-/// combination of the three variables.
-/// The bits are ordered as follows:
-/// 000, 001, 010, 011, 100, 101, 110, 111
-/// The result is a bitset where the i-th bit is set if the expression is true
-/// for the i-th combination of the variables.
-static std::optional<std::bitset<8>>
-extractThreeBitTruthTable(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
-  std::bitset<8> Table;
-  for (int I = 0; I < 8; I++) {
-    bool Val0 = (I >> 2) & 1;
-    bool Val1 = (I >> 1) & 1;
-    bool Val2 = I & 1;
-    SmallMapVector<Value *, bool, 4> Values;
-    Values[Op0] = Val0;
-    Values[Op1] = Val1;
-    Values[Op2] = Val2;
-    auto Result = evaluateBooleanExpression(Expr, Values);
-    if (!Result)
-      return std::nullopt;
-    Table[I] = *Result;
-  }
-  return Table;
+  return It != Computed.end() ? std::optional<std::bitset<8>>(It->second)
+                              : std::nullopt;
 }
 
 /// Try to canonicalize 3-variable boolean expressions using truth table lookup.
-static Value *foldThreeVarBoolExpr(Value *Root,
+static Value *foldThreeVarBoolExpr(Instruction &Root,
                                    InstCombiner::BuilderTy &Builder) {
-  // Only proceed if this is a "complex" expression.
-  if (!isa<BinaryOperator>(Root))
-    return nullptr;
 
-  // Early bailout for expressions with too many uses (avoid expensive analysis
-  // andorxor.ll)
-  if (!Root->hasOneUse())
-    return nullptr;
+  auto &BO = cast<BinaryOperator>(Root);
+  assert(BO.isBitwiseLogicOp() && "Unexpected opcode for boolean expression");
 
-  // Skip transformation if expression is already simple (at most 2 levels
-  // deep).
-  if (auto *BO = dyn_cast<BinaryOperator>(Root)) {
-    bool IsSimple = !isa<BinaryOperator>(BO->getOperand(0)) ||
-                    !isa<BinaryOperator>(BO->getOperand(1));
-    if (IsSimple)
-      return nullptr;
-  }
+  if (!isa<BinaryOperator>(BO.getOperand(0)) ||
+      !isa<BinaryOperator>(BO.getOperand(1)))
+    return nullptr;
 
-  auto [Op0, Op1, Op2] = extractThreeVariables(Root);
+  auto [Op0, Op1, Op2] = extractThreeVariables(&Root);
   if (!Op0 || !Op1 || !Op2)
     return nullptr;
 
-  auto Table = extractThreeBitTruthTable(Root, Op0, Op1, Op2);
+  auto Table = evaluateBooleanExpression(&Root, Op0, Op1, Op2);
   if (!Table)
     return nullptr;
 
-  return createLogicFromTable3Var(*Table, Op0, Op1, Op2, Root, Builder, true);
+  return createLogicFromTable3Var(*Table, Op0, Op1, Op2, &Root, Builder);
 }
 
 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
@@ -2637,7 +2600,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
-  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+  if (Value *Canonical = foldThreeVarBoolExpr(I, Builder))
     return replaceInstUsesWith(I, Canonical);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -4145,7 +4108,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
-  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+  if (Value *Canonical = foldThreeVarBoolExpr(I, Builder))
     return replaceInstUsesWith(I, Canonical);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -4173,7 +4136,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
   Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
     if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
@@ -5299,7 +5261,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
   if (Instruction *Phi = foldBinopWithPhiOperands(I))
     return Phi;
 
-  if (Value *Canonical = foldThreeVarBoolExpr(&I, Builder))
+  if (Value *Canonical = foldThreeVarBoolExpr(I, Builder))
     return replaceInstUsesWith(I, Canonical);
 
   if (Instruction *NewXor = foldXorToXor(I, Builder))

>From 28d4a0f791529d740ee01a26a480eeb9466671e1 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 14 Aug 2025 07:21:49 -0700
Subject: [PATCH 11/25] Add negative tests

---
 llvm/test/Transforms/InstCombine/pr97044.ll | 68 +++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index 9c9bf9aface25..b94662f4135d3 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -84,3 +84,71 @@ define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
   %not = xor i32 %xor, -1
   ret i32 %not
 }
+; Negative Tests
+; Test with non-bitwise operation (should not transform - add/sub not supported)
+define i32 @negative_non_bitwise_add(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_non_bitwise_add(
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD2]]
+;
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %add1, %z
+  ret i32 %add2
+}
+; Test with only 2 variables (should not transform - needs exactly 3 variables)
+define i32 @negative_two_variables(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_two_variables(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[AND]], -1
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %and = and i32 %x, %y
+  %not = xor i32 %and, -1
+  ret i32 %not
+}
+; Test with 4 variables (should not transform - needs exactly 3 variables)
+define i32 @negative_four_variables(i32 %x, i32 %y, i32 %z, i32 %w) {
+; CHECK-LABEL: @negative_four_variables(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Z:%.*]], [[W:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and1 = and i32 %x, %y
+  %and2 = and i32 %z, %w
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+; Test with simple 2-level expression (should not transform - not complex enough)
+define i32 @negative_simple_expression(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_simple_expression(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %x, %y
+  %or = or i32 %and, %z
+  ret i32 %or
+}
+; Test with instructions in different basic blocks (should not transform)
+define i32 @negative_different_basic_blocks(i32 %x, i32 %y, i32 %z, i1 %cond) {
+; CHECK-LABEL: @negative_different_basic_blocks(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+; CHECK:       if.false:
+; CHECK-NEXT:    ret i32 [[AND1]]
+;
+entry:
+  %and1 = and i32 %x, %y
+  br i1 %cond, label %if.true, label %if.false
+if.true:
+  %and2 = and i32 %and1, %z
+  ret i32 %and2
+if.false:
+  ret i32 %and1
+}

>From 1fee55f4e5394369b5df1f139fd056ceb12917b2 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Thu, 14 Aug 2025 10:55:08 -0700
Subject: [PATCH 12/25] correctly checking for vars in same bb in
 extractThreeVariables()

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 1e8c0715ab502..a37660ab23c1a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -106,9 +106,6 @@ extractThreeVariables(Value *Root) {
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
-  // Track all instructions to ensure they're in the same BB
-  BasicBlock *FirstBB = nullptr;
-
   while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
 
@@ -126,12 +123,6 @@ extractThreeVariables(Value *Root) {
       if (!BO->isBitwiseLogicOp())
         return {nullptr, nullptr, nullptr};
 
-      // Check BB consistency
-      if (!FirstBB)
-        FirstBB = BO->getParent();
-      else if (BO->getParent() != FirstBB)
-        return {nullptr, nullptr, nullptr};
-
       if (V == Root || V->hasOneUse()) {
         Visited.insert(BO->getOperand(0));
         Visited.insert(BO->getOperand(1));
@@ -146,8 +137,20 @@ extractThreeVariables(Value *Root) {
   }
 
   if (Variables.size() == 3) {
-    // Sort variables by instruction order
+    // Check that all instruction variables are in the same BB
     SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
+    BasicBlock *FirstBB = nullptr;
+    for (Value *V : SortedVars) {
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        if (!FirstBB) {
+          FirstBB = I->getParent();
+        } else if (I->getParent() != FirstBB) {
+          return {nullptr, nullptr, nullptr};
+        }
+      }
+    }
+
+    // Sort variables by instruction order
     llvm::sort(SortedVars, [](Value *A, Value *B) {
       if (auto *IA = dyn_cast<Instruction>(A))
         if (auto *IB = dyn_cast<Instruction>(B))

>From a39a3b4c1936c7e0197df7854a91ff7d49721a73 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 15 Aug 2025 08:55:30 -0700
Subject: [PATCH 13/25] reuse visited set in extractThreeVariables

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 67 ++++++++-----------
 1 file changed, 29 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a37660ab23c1a..30163641d0f68 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -98,20 +98,25 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
   return Result;
 }
 
-static std::tuple<Value *, Value *, Value *>
-extractThreeVariables(Value *Root) {
+static std::tuple<Value *, Value *, Value *, SmallVector<Instruction *>>
+extractThreeVariablesAndInstructions(Value *Root) {
   SmallPtrSet<Value *, 3> Variables;
   SmallPtrSet<Value *, 32> Visited; // Prevent hanging during loop unrolling
                                     // (see bitreverse-hang.ll)
+  SmallVector<Instruction *> Instructions;
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
   while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
 
+    if (!Visited.insert(V).second)
+      continue;
+
     Value *NotV;
     if (match(V, m_Not(m_Value(NotV)))) {
-      Visited.insert(NotV);
+      if (auto *I = dyn_cast<Instruction>(V))
+        Instructions.push_back(I);
       if (V == Root ||
           V->hasOneUse()) { // Due to lack of cost-based heuristic, only
                             // traverse if it belongs to this expression tree
@@ -121,16 +126,16 @@ extractThreeVariables(Value *Root) {
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
       if (!BO->isBitwiseLogicOp())
-        return {nullptr, nullptr, nullptr};
+        return {nullptr, nullptr, nullptr, {}};
+
+      Instructions.push_back(BO);
 
       if (V == Root || V->hasOneUse()) {
-        Visited.insert(BO->getOperand(0));
-        Visited.insert(BO->getOperand(1));
         Worklist.push_back(BO->getOperand(0));
         Worklist.push_back(BO->getOperand(1));
       }
     } else if (isa<Argument>(V) || isa<Instruction>(V)) {
-      if (!isa<Constant>(V) && V != Root) {
+      if (V != Root) {
         Variables.insert(V);
       }
     }
@@ -145,7 +150,7 @@ extractThreeVariables(Value *Root) {
         if (!FirstBB) {
           FirstBB = I->getParent();
         } else if (I->getParent() != FirstBB) {
-          return {nullptr, nullptr, nullptr};
+          return {nullptr, nullptr, nullptr, {}};
         }
       }
     }
@@ -157,35 +162,22 @@ extractThreeVariables(Value *Root) {
           return IA->comesBefore(IB);
       return A < B;
     });
-    return {SortedVars[0], SortedVars[1], SortedVars[2]};
+
+    // Sort instructions within the same BB
+    llvm::sort(Instructions, [](Instruction *A, Instruction *B) {
+      return A->comesBefore(B);
+    });
+
+    return {SortedVars[0], SortedVars[1], SortedVars[2],
+            std::move(Instructions)};
   }
-  return {nullptr, nullptr, nullptr};
+  return {nullptr, nullptr, nullptr, {}};
 }
 
 /// Evaluate a boolean expression with bit-vector inputs for all 8 combinations.
 static std::optional<std::bitset<8>>
-evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
-  // Post-order traversal of the expression tree
-  SmallVector<Instruction *> Instructions;
-  SmallVector<Value *> ToVisit;
-  SmallPtrSet<Instruction *, 8> Seen;
-
-  ToVisit.push_back(Expr);
-  while (!ToVisit.empty()) {
-    Value *V = ToVisit.pop_back_val();
-    if (auto *I = dyn_cast<Instruction>(V)) {
-      if (Seen.insert(I).second) {
-        Instructions.push_back(I);
-        for (Value *Op : I->operands()) {
-          ToVisit.push_back(Op);
-        }
-      }
-    }
-  }
-
-  // Sort instructions within the same BB
-  llvm::sort(Instructions,
-             [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
+evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
+                          const SmallVector<Instruction *> &Instructions) {
 
   // Initialize bit-vector values for the 3 variables
   // Op0: 0b11110000 (true for combinations 000,001,010,011)
@@ -221,14 +213,12 @@ evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2) {
         Computed[I] = LHS ^ RHS; // Bitwise XOR
         break;
       default:
-        return std::nullopt;
+        llvm_unreachable("Unexpected opcode in boolean expression evaluation");
       }
     }
   }
 
-  auto It = Computed.find(Expr);
-  return It != Computed.end() ? std::optional<std::bitset<8>>(It->second)
-                              : std::nullopt;
+  return std::bitset<8>(Computed.at(Expr));
 }
 
 /// Try to canonicalize 3-variable boolean expressions using truth table lookup.
@@ -242,11 +232,12 @@ static Value *foldThreeVarBoolExpr(Instruction &Root,
       !isa<BinaryOperator>(BO.getOperand(1)))
     return nullptr;
 
-  auto [Op0, Op1, Op2] = extractThreeVariables(&Root);
+  auto [Op0, Op1, Op2, Instructions] =
+      extractThreeVariablesAndInstructions(&Root);
   if (!Op0 || !Op1 || !Op2)
     return nullptr;
 
-  auto Table = evaluateBooleanExpression(&Root, Op0, Op1, Op2);
+  auto Table = evaluateBooleanExpression(&Root, Op0, Op1, Op2, Instructions);
   if (!Table)
     return nullptr;
 

>From 23feb15170236cad7067bd57228b6fc5569e0263 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 15 Aug 2025 11:44:33 -0700
Subject: [PATCH 14/25] multi-use tests + negative tests with and/or Var, Const
 nodes

---
 llvm/test/Transforms/InstCombine/pr97044.ll | 142 +++++++++++++++++++-
 1 file changed, 136 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index b94662f4135d3..c5121c96a0c7b 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -84,8 +84,11 @@ define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
   %not = xor i32 %xor, -1
   ret i32 %not
 }
-; Negative Tests
-; Test with non-bitwise operation (should not transform - add/sub not supported)
+
+; ==============================
+;       Negative Tests
+; ==============================
+
 define i32 @negative_non_bitwise_add(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @negative_non_bitwise_add(
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
@@ -96,7 +99,6 @@ define i32 @negative_non_bitwise_add(i32 %x, i32 %y, i32 %z) {
   %add2 = add i32 %add1, %z
   ret i32 %add2
 }
-; Test with only 2 variables (should not transform - needs exactly 3 variables)
 define i32 @negative_two_variables(i32 %x, i32 %y) {
 ; CHECK-LABEL: @negative_two_variables(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
@@ -107,7 +109,6 @@ define i32 @negative_two_variables(i32 %x, i32 %y) {
   %not = xor i32 %and, -1
   ret i32 %not
 }
-; Test with 4 variables (should not transform - needs exactly 3 variables)
 define i32 @negative_four_variables(i32 %x, i32 %y, i32 %z, i32 %w) {
 ; CHECK-LABEL: @negative_four_variables(
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
@@ -120,7 +121,6 @@ define i32 @negative_four_variables(i32 %x, i32 %y, i32 %z, i32 %w) {
   %or = or i32 %and1, %and2
   ret i32 %or
 }
-; Test with simple 2-level expression (should not transform - not complex enough)
 define i32 @negative_simple_expression(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @negative_simple_expression(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
@@ -131,7 +131,6 @@ define i32 @negative_simple_expression(i32 %x, i32 %y, i32 %z) {
   %or = or i32 %and, %z
   ret i32 %or
 }
-; Test with instructions in different basic blocks (should not transform)
 define i32 @negative_different_basic_blocks(i32 %x, i32 %y, i32 %z, i1 %cond) {
 ; CHECK-LABEL: @negative_different_basic_blocks(
 ; CHECK-NEXT:  entry:
@@ -152,3 +151,134 @@ if.true:
 if.false:
   ret i32 %and1
 }
+define i32 @negative_two_vars_one_const(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_two_vars_one_const(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], 42
+; CHECK-NEXT:    [[AND3_DEMORGAN:%.*]] = or i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[AND3:%.*]] = xor i32 [[AND3_DEMORGAN]], -1
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND2]], [[AND3]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and1 = and i32 %x, %y
+  %and2 = and i32 %and1, 42
+  %not_x = xor i32 %x, -1
+  %not_y = xor i32 %y, -1
+  %and3 = and i32 %not_x, %not_y
+  %or = or i32 %and2, %and3
+  ret i32 %or
+}
+
+define i32 @negative_one_var_two_consts(i32 %x) {
+; CHECK-LABEL: @negative_one_var_two_consts(
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[NOT_X:%.*]] = and i32 [[X]], 3
+; CHECK-NEXT:    [[AND3:%.*]] = xor i32 [[NOT_X]], 3
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[AND2]], [[AND3]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and1 = and i32 %x, 15
+  %and2 = and i32 %and1, 7
+  %not_x = xor i32 %x, -1
+  %and3 = and i32 %not_x, 3
+  %or = or i32 %and2, %and3
+  ret i32 %or
+}
+
+define i32 @negative_const_pattern_match(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_const_pattern_match(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], 255
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[XOR]], -1
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %or = or i32 %y, 255
+  %xor = xor i32 %or, %x
+  %not = xor i32 %xor, -1
+  ret i32 %not
+}
+
+define i32 @negative_mixed_vars_consts(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_mixed_vars_consts(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], 96
+; CHECK-NEXT:    [[NOT_X:%.*]] = and i32 [[X]], 170
+; CHECK-NEXT:    [[AND3:%.*]] = xor i32 [[NOT_X]], 170
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[AND2]], [[AND3]]
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[Y]], 204
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[AND4]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and1 = and i32 %x, %y
+  %and2 = and i32 %and1, 96
+  %not_x = xor i32 %x, -1
+  %and3 = and i32 %not_x, 170
+  %or1 = or i32 %and2, %and3
+  %and4 = and i32 %y, 204
+  %xor = xor i32 %or1, %and4
+  ret i32 %xor
+}
+
+define i32 @negative_const_blocks_extraction(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_const_blocks_extraction(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], 42
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[AND1]], [[AND2]]
+; CHECK-NEXT:    [[AND3:%.*]] = and i32 [[X]], 24
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR1]], [[AND3]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and1 = and i32 %x, 42
+  %and2 = and i32 %y, %z
+  %or1 = or i32 %and1, %and2
+  %and3 = and i32 %x, 24
+  %xor = xor i32 %or1, %and3
+  ret i32 %xor
+}
+
+; ==============================
+;       Multi-use Tests
+; ==============================
+declare void @use(i32)
+define i32 @multi_use_not(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @multi_use_not(
+; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    call void @use(i32 [[NOT1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[TMP1]], [[NOT1]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %not1 = xor i32 %x, -1
+  call void @use(i32 %not1)
+  %and1 = and i32 %not1, %y
+  %and2 = and i32 %and1, %z
+  ret i32 %and2
+}
+define i32 @multi_use_binop(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @multi_use_binop(
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use(i32 [[AND1]])
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %and1 = and i32 %x, %y
+  call void @use(i32 %and1)
+  %and2 = and i32 %and1, %z
+  ret i32 %and2
+}
+define i32 @multi_use_multiple(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @multi_use_multiple(
+; CHECK-NEXT:    [[NOT1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[Y:%.*]], [[NOT1]]
+; CHECK-NEXT:    call void @use(i32 [[NOT1]])
+; CHECK-NEXT:    call void @use(i32 [[AND1]])
+; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %not1 = xor i32 %x, -1
+  %and1 = and i32 %not1, %y
+  call void @use(i32 %not1)
+  call void @use(i32 %and1)
+  %and2 = and i32 %and1, %z
+  ret i32 %and2
+}
\ No newline at end of file

>From 1a94bba9746082ec6089085d3100675bc0b4a011 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 15 Aug 2025 11:47:38 -0700
Subject: [PATCH 15/25] Computed Map validation in extractThreeVar

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 30163641d0f68..e3e85fbcd1bbf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -155,6 +155,26 @@ extractThreeVariablesAndInstructions(Value *Root) {
       }
     }
 
+    // Validation that all collected instructions have operands that will be in
+    // Computed map
+    SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
+    ValidOperands.insert(Instructions.begin(), Instructions.end());
+
+    for (Instruction *I : Instructions) {
+      Value *NotV;
+      if (match(I, m_Not(m_Value(NotV)))) {
+        // For NOT operations, only check the variable operand (constant -1 is
+        // handled by pattern matcher)
+        if (!ValidOperands.count(NotV))
+          return {nullptr, nullptr, nullptr, {}};
+      } else {
+        for (Use &U : I->operands()) {
+          if (!ValidOperands.count(U.get()))
+            return {nullptr, nullptr, nullptr, {}};
+        }
+      }
+    }
+
     // Sort variables by instruction order
     llvm::sort(SortedVars, [](Value *A, Value *B) {
       if (auto *IA = dyn_cast<Instruction>(A))
@@ -191,14 +211,8 @@ evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
   for (Instruction *I : Instructions) {
     Value *NotV;
     if (match(I, m_Not(m_Value(NotV)))) {
-      if (!Computed.count(NotV))
-        return std::nullopt;
       Computed[I] = ~Computed.at(NotV); // Bitwise NOT
     } else if (auto *BO = dyn_cast<BinaryOperator>(I)) {
-      if (!Computed.count(BO->getOperand(0)) ||
-          !Computed.count(BO->getOperand(1)))
-        return std::nullopt;
-
       auto &LHS = Computed.at(BO->getOperand(0));
       auto &RHS = Computed.at(BO->getOperand(1));
 

>From d19190df11cbc5a1323979975886bfdbe93e36d6 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Tue, 19 Aug 2025 06:33:29 -0700
Subject: [PATCH 16/25] Pass instructions by reference instead of returning
 vectors

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e3e85fbcd1bbf..e9634bb35df87 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -98,12 +98,12 @@ static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
   return Result;
 }
 
-static std::tuple<Value *, Value *, Value *, SmallVector<Instruction *>>
-extractThreeVariablesAndInstructions(Value *Root) {
+static std::tuple<Value *, Value *, Value *>
+extractThreeVariablesAndInstructions(
+    Value *Root, SmallVectorImpl<Instruction *> &Instructions) {
   SmallPtrSet<Value *, 3> Variables;
   SmallPtrSet<Value *, 32> Visited; // Prevent hanging during loop unrolling
                                     // (see bitreverse-hang.ll)
-  SmallVector<Instruction *> Instructions;
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
@@ -126,7 +126,7 @@ extractThreeVariablesAndInstructions(Value *Root) {
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
       if (!BO->isBitwiseLogicOp())
-        return {nullptr, nullptr, nullptr, {}};
+        return {nullptr, nullptr, nullptr};
 
       Instructions.push_back(BO);
 
@@ -150,7 +150,7 @@ extractThreeVariablesAndInstructions(Value *Root) {
         if (!FirstBB) {
           FirstBB = I->getParent();
         } else if (I->getParent() != FirstBB) {
-          return {nullptr, nullptr, nullptr, {}};
+          return {nullptr, nullptr, nullptr};
         }
       }
     }
@@ -166,11 +166,11 @@ extractThreeVariablesAndInstructions(Value *Root) {
         // For NOT operations, only check the variable operand (constant -1 is
         // handled by pattern matcher)
         if (!ValidOperands.count(NotV))
-          return {nullptr, nullptr, nullptr, {}};
+          return {nullptr, nullptr, nullptr};
       } else {
         for (Use &U : I->operands()) {
           if (!ValidOperands.count(U.get()))
-            return {nullptr, nullptr, nullptr, {}};
+            return {nullptr, nullptr, nullptr};
         }
       }
     }
@@ -188,10 +188,9 @@ extractThreeVariablesAndInstructions(Value *Root) {
       return A->comesBefore(B);
     });
 
-    return {SortedVars[0], SortedVars[1], SortedVars[2],
-            std::move(Instructions)};
+    return {SortedVars[0], SortedVars[1], SortedVars[2]};
   }
-  return {nullptr, nullptr, nullptr, {}};
+  return {nullptr, nullptr, nullptr};
 }
 
 /// Evaluate a boolean expression with bit-vector inputs for all 8 combinations.
@@ -246,8 +245,9 @@ static Value *foldThreeVarBoolExpr(Instruction &Root,
       !isa<BinaryOperator>(BO.getOperand(1)))
     return nullptr;
 
-  auto [Op0, Op1, Op2, Instructions] =
-      extractThreeVariablesAndInstructions(&Root);
+  SmallVector<Instruction *> Instructions;
+  auto [Op0, Op1, Op2] =
+      extractThreeVariablesAndInstructions(&Root, Instructions);
   if (!Op0 || !Op1 || !Op2)
     return nullptr;
 

>From 9296d9b978ccc3ee7f27677a85362d37afff0355 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 20 Aug 2025 08:53:42 -0700
Subject: [PATCH 17/25] early check for invalid num of variables

---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e9634bb35df87..a80099c1b9939 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -141,7 +141,9 @@ extractThreeVariablesAndInstructions(
     }
   }
 
-  if (Variables.size() == 3) {
+  if (Variables.size() != 3) {
+    return {nullptr, nullptr, nullptr};
+  }
     // Check that all instruction variables are in the same BB
     SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
     BasicBlock *FirstBB = nullptr;
@@ -189,7 +191,7 @@ extractThreeVariablesAndInstructions(
     });
 
     return {SortedVars[0], SortedVars[1], SortedVars[2]};
-  }
+
   return {nullptr, nullptr, nullptr};
 }
 

>From 48bd1cac5c5e6977cef71c8a34ac91c56dcc5a21 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 20 Aug 2025 09:35:44 -0700
Subject: [PATCH 18/25] Improved sorting

---
 .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a80099c1b9939..7cb7d8ef81ccb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -177,12 +177,16 @@ extractThreeVariablesAndInstructions(
       }
     }
 
-    // Sort variables by instruction order
+    // Sort variables by argNo if both are arguments, otherwise args before
+    // instructions
     llvm::sort(SortedVars, [](Value *A, Value *B) {
-      if (auto *IA = dyn_cast<Instruction>(A))
-        if (auto *IB = dyn_cast<Instruction>(B))
-          return IA->comesBefore(IB);
-      return A < B;
+      if (isa<Argument>(A) != isa<Argument>(B))
+        return isa<Argument>(A);
+
+      if (isa<Argument>(A))
+        return cast<Argument>(A)->getArgNo() < cast<Argument>(B)->getArgNo();
+
+      return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
     });
 
     // Sort instructions within the same BB

>From 464d95eafc30dc3a4a04b4152469c9ddb0c4e730 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 20 Aug 2025 11:02:18 -0700
Subject: [PATCH 19/25] treat non-bitwise ops as leaf nodes with use-count
 heuristic

---
 .../InstCombine/InstCombineAndOrXor.cpp       |  7 +-
 llvm/test/Transforms/InstCombine/pr97044.ll   | 91 +++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7cb7d8ef81ccb..8e373dfec742b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -125,8 +125,13 @@ extractThreeVariablesAndInstructions(
       continue;
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-      if (!BO->isBitwiseLogicOp())
+      if (!BO->isBitwiseLogicOp()) {
+        if (V != Root && !V->hasOneUse()) {
+          Variables.insert(V);
+          continue;
+        }
         return {nullptr, nullptr, nullptr};
+      }
 
       Instructions.push_back(BO);
 
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index c5121c96a0c7b..79dc19dfe905b 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -85,6 +85,56 @@ define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
   ret i32 %not
 }
 
+define i32 @test_add_as_leaf(i32 %x, i32 %y, i32 %c) {
+; CHECK-LABEL: @test_add_as_leaf(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[NOT3:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[AND4:%.*]] = and i32 [[ADD]], [[NOT3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[Y:%.*]], [[AND4]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], [[ADD]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %add = add i32 %x, %c
+  %not1 = xor i32 %add, -1
+  %and1 = and i32 %not1, %y
+  %not2 = xor i32 %y, -1
+  %and2 = and i32 %add, %not2
+  %or = or i32 %and1, %and2
+  %and3 = and i32 %x, %y
+  %not3 = xor i32 %x, -1
+  %and4 = and i32 %not3, %add
+  %xor = xor i32 %or, %and4
+  ret i32 %xor
+}
+
+define i32 @test_sub_as_leaf(i32 %a, i32 %b, i32 %offset) {
+; CHECK-LABEL: @test_sub_as_leaf(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[A:%.*]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[B:%.*]], [[SUB]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[A]]
+; CHECK-NEXT:    [[RESULT:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+  %sub = sub i32 %a, %offset
+  %not1 = xor i32 %sub, -1
+  %and1 = and i32 %not1, %b
+  %and2 = and i32 %and1, %a
+  %not2 = xor i32 %b, -1
+  %and3 = and i32 %a, %not2
+  %and4 = and i32 %and3, %sub
+  %or = or i32 %and2, %and4
+  %not3 = xor i32 %a, -1
+  %not4 = xor i32 %b, -1
+  %and5 = and i32 %not3, %not4
+  %not5 = xor i32 %sub, -1
+  %and6 = and i32 %and5, %not5
+  %or2 = or i32 %or, %and6
+  %and7 = and i32 %a, %b
+  %and8 = and i32 %and7, %sub
+  %result = or i32 %or2, %and8
+  ret i32 %result
+}
+
 ; ==============================
 ;       Negative Tests
 ; ==============================
@@ -236,6 +286,17 @@ define i32 @negative_const_blocks_extraction(i32 %x, i32 %y, i32 %z) {
   ret i32 %xor
 }
 
+define i32 @negative_single_use_add(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_single_use_add(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %add = add i32 %x, %y     ; Single-use non-bitwise op
+  %and = and i32 %add, %z   ; Only 2 variables: %add, %z (should not optimize)
+  ret i32 %and
+}
+
 ; ==============================
 ;       Multi-use Tests
 ; ==============================
@@ -281,4 +342,34 @@ define i32 @multi_use_multiple(i32 %x, i32 %y, i32 %z) {
   call void @use(i32 %and1)
   %and2 = and i32 %and1, %z
   ret i32 %and2
+}
+
+define i32 @multi_use_add_as_variable(i32 %x, i32 %y, i32 %offset) {
+; CHECK-LABEL: @multi_use_add_as_variable(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[OFFSET:%.*]]
+; CHECK-NEXT:    call void @use(i32 [[ADD]])
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[Y:%.*]], [[ADD]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[RESULT:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+  %add = add i32 %x, %offset    ; Multi-use non-bitwise op  
+  call void @use(i32 %add)      ; Extra use
+  %not1 = xor i32 %add, -1
+  %and1 = and i32 %not1, %y
+  %and2 = and i32 %and1, %x
+  %not2 = xor i32 %y, -1
+  %and3 = and i32 %x, %not2
+  %and4 = and i32 %and3, %add
+  %or = or i32 %and2, %and4
+  %not3 = xor i32 %x, -1
+  %not4 = xor i32 %y, -1
+  %and5 = and i32 %not3, %not4
+  %not5 = xor i32 %add, -1
+  %and6 = and i32 %and5, %not5
+  %or2 = or i32 %or, %and6
+  %and7 = and i32 %x, %y
+  %and8 = and i32 %and7, %add
+  %result = or i32 %or2, %and8
+  ret i32 %result
 }
\ No newline at end of file

>From 2a905fb9f441d6a0bff80be2b5197c29a243e931 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 20 Aug 2025 11:11:13 -0700
Subject: [PATCH 20/25] format

---
 llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8e373dfec742b..2b807b8ae1512 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -201,7 +201,7 @@ extractThreeVariablesAndInstructions(
 
     return {SortedVars[0], SortedVars[1], SortedVars[2]};
 
-  return {nullptr, nullptr, nullptr};
+    return {nullptr, nullptr, nullptr};
 }
 
 /// Evaluate a boolean expression with bit-vector inputs for all 8 combinations.

>From fc2aac4d423fa8725208303cacc646b3c7a22e65 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Wed, 20 Aug 2025 11:22:29 -0700
Subject: [PATCH 21/25] format-2

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 81 +++++++++----------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2b807b8ae1512..b2a2ac81c2c0e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -149,59 +149,56 @@ extractThreeVariablesAndInstructions(
   if (Variables.size() != 3) {
     return {nullptr, nullptr, nullptr};
   }
-    // Check that all instruction variables are in the same BB
-    SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
-    BasicBlock *FirstBB = nullptr;
-    for (Value *V : SortedVars) {
-      if (auto *I = dyn_cast<Instruction>(V)) {
-        if (!FirstBB) {
-          FirstBB = I->getParent();
-        } else if (I->getParent() != FirstBB) {
-          return {nullptr, nullptr, nullptr};
-        }
+  // Check that all instruction variables are in the same BB
+  SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
+  BasicBlock *FirstBB = nullptr;
+  for (Value *V : SortedVars) {
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      if (!FirstBB) {
+        FirstBB = I->getParent();
+      } else if (I->getParent() != FirstBB) {
+        return {nullptr, nullptr, nullptr};
       }
     }
+  }
 
-    // Validation that all collected instructions have operands that will be in
-    // Computed map
-    SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
-    ValidOperands.insert(Instructions.begin(), Instructions.end());
-
-    for (Instruction *I : Instructions) {
-      Value *NotV;
-      if (match(I, m_Not(m_Value(NotV)))) {
-        // For NOT operations, only check the variable operand (constant -1 is
-        // handled by pattern matcher)
-        if (!ValidOperands.count(NotV))
+  // Validation that all collected instructions have operands that will be in
+  // Computed map
+  SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
+  ValidOperands.insert(Instructions.begin(), Instructions.end());
+
+  for (Instruction *I : Instructions) {
+    Value *NotV;
+    if (match(I, m_Not(m_Value(NotV)))) {
+      // For NOT operations, only check the variable operand (constant -1 is
+      // handled by pattern matcher)
+      if (!ValidOperands.count(NotV))
+        return {nullptr, nullptr, nullptr};
+    } else {
+      for (Use &U : I->operands()) {
+        if (!ValidOperands.count(U.get()))
           return {nullptr, nullptr, nullptr};
-      } else {
-        for (Use &U : I->operands()) {
-          if (!ValidOperands.count(U.get()))
-            return {nullptr, nullptr, nullptr};
-        }
       }
     }
+  }
 
-    // Sort variables by argNo if both are arguments, otherwise args before
-    // instructions
-    llvm::sort(SortedVars, [](Value *A, Value *B) {
-      if (isa<Argument>(A) != isa<Argument>(B))
-        return isa<Argument>(A);
-
-      if (isa<Argument>(A))
-        return cast<Argument>(A)->getArgNo() < cast<Argument>(B)->getArgNo();
+  // Sort variables by argNo if both are arguments, otherwise args before
+  // instructions
+  llvm::sort(SortedVars, [](Value *A, Value *B) {
+    if (isa<Argument>(A) != isa<Argument>(B))
+      return isa<Argument>(A);
 
-      return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
-    });
+    if (isa<Argument>(A))
+      return cast<Argument>(A)->getArgNo() < cast<Argument>(B)->getArgNo();
 
-    // Sort instructions within the same BB
-    llvm::sort(Instructions, [](Instruction *A, Instruction *B) {
-      return A->comesBefore(B);
-    });
+    return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
+  });
 
-    return {SortedVars[0], SortedVars[1], SortedVars[2]};
+  // Sort instructions within the same BB
+  llvm::sort(Instructions,
+             [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
 
-    return {nullptr, nullptr, nullptr};
+  return {SortedVars[0], SortedVars[1], SortedVars[2]};
 }
 
 /// Evaluate a boolean expression with bit-vector inputs for all 8 combinations.

>From d557827a7abf83262859b0f0aad306e9c4863dfc Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Tue, 26 Aug 2025 02:41:39 -0700
Subject: [PATCH 22/25] validate no cross-BB instruction order comparison for
 computation instructions

---
 .../Transforms/InstCombine/InstCombineAndOrXor.cpp   | 12 +++++++++++-
 llvm/test/Transforms/InstCombine/pr97044.ll          |  4 ++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b2a2ac81c2c0e..936133eaa3f7e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -149,9 +149,11 @@ extractThreeVariablesAndInstructions(
   if (Variables.size() != 3) {
     return {nullptr, nullptr, nullptr};
   }
-  // Check that all instruction variables are in the same BB
+  // Check that all instructions (both variables and computation instructions)
+  // are in the same BB
   SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
   BasicBlock *FirstBB = nullptr;
+
   for (Value *V : SortedVars) {
     if (auto *I = dyn_cast<Instruction>(V)) {
       if (!FirstBB) {
@@ -162,6 +164,14 @@ extractThreeVariablesAndInstructions(
     }
   }
 
+  for (Instruction *I : Instructions) {
+    if (!FirstBB) {
+      FirstBB = I->getParent();
+    } else if (I->getParent() != FirstBB) {
+      return {nullptr, nullptr, nullptr};
+    }
+  }
+
   // Validation that all collected instructions have operands that will be in
   // Computed map
   SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index 79dc19dfe905b..faa8e3b2cb4ce 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -353,7 +353,7 @@ define i32 @multi_use_add_as_variable(i32 %x, i32 %y, i32 %offset) {
 ; CHECK-NEXT:    [[RESULT:%.*]] = xor i32 [[TMP2]], -1
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;
-  %add = add i32 %x, %offset    ; Multi-use non-bitwise op  
+  %add = add i32 %x, %offset    ; Multi-use non-bitwise op
   call void @use(i32 %add)      ; Extra use
   %not1 = xor i32 %add, -1
   %and1 = and i32 %not1, %y
@@ -372,4 +372,4 @@ define i32 @multi_use_add_as_variable(i32 %x, i32 %y, i32 %offset) {
   %and8 = and i32 %and7, %add
   %result = or i32 %or2, %and8
   ret i32 %result
-}
\ No newline at end of file
+}

>From 5c40046d882f74d9f7f5f53a2021ca19135e765d Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 29 Aug 2025 03:17:42 -0700
Subject: [PATCH 23/25] (NFC: Styling) + Structural Similarity Check for loop

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 103 +++++++++---------
 1 file changed, 51 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 936133eaa3f7e..ac190efedd62b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -53,10 +53,7 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
 
 /// This is to create optimal 3-variable boolean logic from truth tables.
 /// currently it supports the cases pertaining to the issue 97044. More cases
-/// can be added based on real-world justification for specific 3 input cases
-///  or with reviewer approval all 256 cases can be added (choose the
-///  canonicalizations found
-/// in x86InstCombine.cpp?)
+/// can be added based on real-world justification for specific 3 input cases.
 static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
                                        Value *Op1, Value *Op2, Value *Root,
                                        IRBuilderBase &Builder) {
@@ -102,8 +99,7 @@ static std::tuple<Value *, Value *, Value *>
 extractThreeVariablesAndInstructions(
     Value *Root, SmallVectorImpl<Instruction *> &Instructions) {
   SmallPtrSet<Value *, 3> Variables;
-  SmallPtrSet<Value *, 32> Visited; // Prevent hanging during loop unrolling
-                                    // (see bitreverse-hang.ll)
+  SmallPtrSet<Value *, 32> Visited;
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
@@ -113,87 +109,80 @@ extractThreeVariablesAndInstructions(
     if (!Visited.insert(V).second)
       continue;
 
-    Value *NotV;
-    if (match(V, m_Not(m_Value(NotV)))) {
+    // Due to lack of cost-based heuristic, only traverse if it belongs to this
+    // expression tree.
+    bool ShouldTraverse = (V == Root || V->hasOneUse());
+
+    if (Value *NotV; match(V, m_Not(m_Value(NotV)))) {
       if (auto *I = dyn_cast<Instruction>(V))
         Instructions.push_back(I);
-      if (V == Root ||
-          V->hasOneUse()) { // Due to lack of cost-based heuristic, only
-                            // traverse if it belongs to this expression tree
+      if (ShouldTraverse)
         Worklist.push_back(NotV);
-      }
       continue;
     }
     if (auto *BO = dyn_cast<BinaryOperator>(V)) {
       if (!BO->isBitwiseLogicOp()) {
-        if (V != Root && !V->hasOneUse()) {
-          Variables.insert(V);
-          continue;
-        }
-        return {nullptr, nullptr, nullptr};
+        if (V == Root)
+          return {nullptr, nullptr, nullptr};
+        Variables.insert(V);
+        continue;
       }
 
       Instructions.push_back(BO);
 
-      if (V == Root || V->hasOneUse()) {
+      if (ShouldTraverse) {
         Worklist.push_back(BO->getOperand(0));
         Worklist.push_back(BO->getOperand(1));
       }
-    } else if (isa<Argument>(V) || isa<Instruction>(V)) {
-      if (V != Root) {
-        Variables.insert(V);
-      }
+    } else if ((isa<Argument>(V) || isa<Instruction>(V)) && V != Root) {
+      Variables.insert(V);
     }
   }
 
-  if (Variables.size() != 3) {
+  if (Variables.size() != 3)
     return {nullptr, nullptr, nullptr};
-  }
   // Check that all instructions (both variables and computation instructions)
-  // are in the same BB
+  // are in the same BB.
   SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
   BasicBlock *FirstBB = nullptr;
 
-  for (Value *V : SortedVars) {
-    if (auto *I = dyn_cast<Instruction>(V)) {
-      if (!FirstBB) {
-        FirstBB = I->getParent();
-      } else if (I->getParent() != FirstBB) {
-        return {nullptr, nullptr, nullptr};
-      }
-    }
-  }
-
-  for (Instruction *I : Instructions) {
-    if (!FirstBB) {
+  auto CheckSameBB = [&FirstBB](Instruction *I) -> bool {
+    if (!FirstBB)
       FirstBB = I->getParent();
-    } else if (I->getParent() != FirstBB) {
+    else if (I->getParent() != FirstBB)
+      return false;
+    return true;
+  };
+
+  for (Value *V : SortedVars)
+    if (auto *I = dyn_cast<Instruction>(V); I && !CheckSameBB(I))
+      return {nullptr, nullptr, nullptr};
+
+  for (Instruction *I : Instructions)
+    if (!CheckSameBB(I))
       return {nullptr, nullptr, nullptr};
-    }
-  }
 
   // Validation that all collected instructions have operands that will be in
-  // Computed map
+  // Computed map.
   SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
   ValidOperands.insert(Instructions.begin(), Instructions.end());
 
   for (Instruction *I : Instructions) {
     Value *NotV;
-    if (match(I, m_Not(m_Value(NotV)))) {
-      // For NOT operations, only check the variable operand (constant -1 is
-      // handled by pattern matcher)
-      if (!ValidOperands.count(NotV))
-        return {nullptr, nullptr, nullptr};
-    } else {
+    bool IsNot = match(I, m_Not(m_Value(NotV)));
+
+    if (!IsNot) {
       for (Use &U : I->operands()) {
         if (!ValidOperands.count(U.get()))
           return {nullptr, nullptr, nullptr};
       }
+    } else if (!ValidOperands.count(NotV)) {
+      // For NOT: only check the variable operand (constant -1 is handled by
+      // pattern matcher).
+      return {nullptr, nullptr, nullptr};
     }
   }
 
-  // Sort variables by argNo if both are arguments, otherwise args before
-  // instructions
   llvm::sort(SortedVars, [](Value *A, Value *B) {
     if (isa<Argument>(A) != isa<Argument>(B))
       return isa<Argument>(A);
@@ -204,7 +193,7 @@ extractThreeVariablesAndInstructions(
     return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
   });
 
-  // Sort instructions within the same BB
+  // Sort instructions (Useful until all 256 cases are added).
   llvm::sort(Instructions,
              [](Instruction *A, Instruction *B) { return A->comesBefore(B); });
 
@@ -216,7 +205,7 @@ static std::optional<std::bitset<8>>
 evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
                           const SmallVector<Instruction *> &Instructions) {
 
-  // Initialize bit-vector values for the 3 variables
+  // Initialize bit-vector values for the 3 variables as:
   // Op0: 0b11110000 (true for combinations 000,001,010,011)
   // Op1: 0b11001100 (true for combinations 000,001,100,101)
   // Op2: 0b10101010 (true for combinations 000,010,100,110)
@@ -252,7 +241,8 @@ evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
   return std::bitset<8>(Computed.at(Expr));
 }
 
-/// Try to canonicalize 3-variable boolean expressions using truth table lookup.
+// Entry point for the 3-variable boolean expression folding. Handles early
+// returns and checks for infinite cycles.
 static Value *foldThreeVarBoolExpr(Instruction &Root,
                                    InstCombiner::BuilderTy &Builder) {
 
@@ -273,6 +263,15 @@ static Value *foldThreeVarBoolExpr(Instruction &Root,
   if (!Table)
     return nullptr;
 
+  // Prevent infinite cycles by checking for structurally similar instructions:
+  // early return if extracted variables overlap with root operands.
+  auto *RootBO = cast<BinaryOperator>(&Root);
+  for (unsigned i = 0; i < RootBO->getNumOperands(); ++i) {
+    Value *RootOp = RootBO->getOperand(i);
+    if (RootOp == Op0 || RootOp == Op1 || RootOp == Op2)
+      return nullptr;
+  }
+
   return createLogicFromTable3Var(*Table, Op0, Op1, Op2, &Root, Builder);
 }
 

>From 7bf8caf950d52af65664c7c7cb86ea6861acb736 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 29 Aug 2025 05:11:00 -0700
Subject: [PATCH 24/25] Traverse root operands to avoid treating them as leaf
 variables

---
 .../InstCombine/InstCombineAndOrXor.cpp       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ac190efedd62b..ef23e5490e8f2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -100,9 +100,16 @@ extractThreeVariablesAndInstructions(
     Value *Root, SmallVectorImpl<Instruction *> &Instructions) {
   SmallPtrSet<Value *, 3> Variables;
   SmallPtrSet<Value *, 32> Visited;
+  SmallPtrSet<Value *, 8> RootOperands;
   SmallVector<Value *> Worklist;
   Worklist.push_back(Root);
 
+  // Traverse root operands to avoid treating them as leaf variables to prevent
+  // infinite cycles.
+  if (auto *RootInst = dyn_cast<Instruction>(Root))
+    for (Use &U : RootInst->operands())
+      RootOperands.insert(U.get());
+
   while (!Worklist.empty()) {
     Value *V = Worklist.pop_back_val();
 
@@ -124,7 +131,8 @@ extractThreeVariablesAndInstructions(
       if (!BO->isBitwiseLogicOp()) {
         if (V == Root)
           return {nullptr, nullptr, nullptr};
-        Variables.insert(V);
+        if (!RootOperands.count(V))
+          Variables.insert(V);
         continue;
       }
 
@@ -135,7 +143,8 @@ extractThreeVariablesAndInstructions(
         Worklist.push_back(BO->getOperand(1));
       }
     } else if ((isa<Argument>(V) || isa<Instruction>(V)) && V != Root) {
-      Variables.insert(V);
+      if (!RootOperands.count(V))
+        Variables.insert(V);
     }
   }
 
@@ -242,7 +251,7 @@ evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
 }
 
 // Entry point for the 3-variable boolean expression folding. Handles early
-// returns and checks for infinite cycles.
+// returns.
 static Value *foldThreeVarBoolExpr(Instruction &Root,
                                    InstCombiner::BuilderTy &Builder) {
 
@@ -263,15 +272,6 @@ static Value *foldThreeVarBoolExpr(Instruction &Root,
   if (!Table)
     return nullptr;
 
-  // Prevent infinite cycles by checking for structurally similar instructions:
-  // early return if extracted variables overlap with root operands.
-  auto *RootBO = cast<BinaryOperator>(&Root);
-  for (unsigned i = 0; i < RootBO->getNumOperands(); ++i) {
-    Value *RootOp = RootBO->getOperand(i);
-    if (RootOp == Op0 || RootOp == Op1 || RootOp == Op2)
-      return nullptr;
-  }
-
   return createLogicFromTable3Var(*Table, Op0, Op1, Op2, &Root, Builder);
 }
 

>From abd628d944e286173903fc0a70966bb33f25eb78 Mon Sep 17 00:00:00 2001
From: Yafet Beyene <ybeyene at nvidia.com>
Date: Fri, 29 Aug 2025 05:11:42 -0700
Subject: [PATCH 25/25] NFC: negative test for treating root operands as leaf
 variables

---
 llvm/test/Transforms/InstCombine/pr97044.ll | 23 +++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
index faa8e3b2cb4ce..06b726bc508be 100644
--- a/llvm/test/Transforms/InstCombine/pr97044.ll
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -297,6 +297,29 @@ define i32 @negative_single_use_add(i32 %x, i32 %y, i32 %z) {
   ret i32 %and
 }
 
+define i32 @negative_add_as_root_operand(i32 %x, i32 %y, i32 %c) {
+; CHECK-LABEL: @negative_add_as_root_operand(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %add = add i32 %x, %c
+  %and = and i32 %add, %y   ; Root: %add is direct operand
+  ret i32 %and
+}
+
+define i32 @negative_sub_as_root_operand(i32 %a, i32 %b, i32 %offset) {
+; CHECK-LABEL: @negative_sub_as_root_operand(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[A:%.*]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[SUB]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+; Similar test with SUB as root operand
+  %sub = sub i32 %a, %offset 
+  %xor = xor i32 %sub, %b     ; Root: %sub is direct operand
+  ret i32 %xor
+}
+
 ; ==============================
 ;       Multi-use Tests
 ; ==============================



More information about the llvm-commits mailing list