[llvm] [InstCombine] Canonicalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table (PR #149530)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 22 07:21:29 PDT 2025


================
@@ -50,6 +51,242 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
   return Builder.CreateFCmpFMF(NewPred, LHS, RHS, FMF);
 }
 
+/// This is to create optimal 3-variable boolean logic from truth tables.
+/// Currently it supports the cases for canonicalizing to the form ~((Op1 | Op2)
+/// ^ Op0). More cases can be systematically added based on real-world
+/// justification for specific 3 input cases.
+static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0,
+                                       Value *Op1, Value *Op2, Value *Root,
+                                       IRBuilderBase &Builder) {
+  uint8_t TruthValue = Table.to_ulong();
+  auto FoldConstant = [&](bool Val) {
+    Type *Ty = Op0->getType();
+    return Val ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty);
+  };
+
+  Value *Result = nullptr;
+  switch (TruthValue) {
+  default:
+    return nullptr;
+  case 0x00: // Always FALSE
+    Result = FoldConstant(false);
+    break;
+  case 0xFF: // Always TRUE
+    Result = FoldConstant(true);
+    break;
+  case 0xE1: // ~((Op1 | Op2) ^ Op0)
+  {
+    Value *Or = Builder.CreateOr(Op1, Op2);
+    Value *Xor = Builder.CreateXor(Or, Op0);
+    Result = Builder.CreateNot(Xor);
+  } break;
+  case 0x60: // Op0 & (Op1 ^ Op2)
+  {
+    Value *Xor = Builder.CreateXor(Op1, Op2);
+    Result = Builder.CreateAnd(Op0, Xor);
+  } break;
+  case 0xD2: // ((Op1 | Op2) ^ Op0) ^ Op1
+  {
+    Value *Or = Builder.CreateOr(Op1, Op2);
+    Value *Xor1 = Builder.CreateXor(Or, Op0);
+    Result = Builder.CreateXor(Xor1, Op1);
+  } break;
+  }
+
+  return Result;
+}
+
+/// Extracts exactly 3 variables for truth table optimization from a boolean
+/// expression tree. Traverses single-use instructions, handles non-bitwise ops
+/// as leaf variables, and validates the expression tree structure before
+/// returning the variables in deterministic order. Returns {nullptr, nullptr,
+/// nullptr} if the pattern doesn't match 3-variable optimization criteria in
+/// order to enable an early return.
+static std::tuple<Value *, Value *, Value *>
+extractThreeVariablesAndInstructions(
+    Value *Root, SmallVectorImpl<Instruction *> &Instructions) {
+  SmallPtrSet<Value *, 3> Variables;
+  SmallPtrSet<Value *, 32> Visited;
+  SmallPtrSet<Value *, 8> RootOperands;
+  SmallVector<Value *> Worklist;
+  Worklist.push_back(Root);
+
+  // Traverse root operands to avoid treating them as leaf variables to prevent
+  // infinite cycles.
+  if (auto *RootInst = dyn_cast<Instruction>(Root))
+    for (Use &U : RootInst->operands())
+      RootOperands.insert(U.get());
+
+  while (!Worklist.empty()) {
+    Value *V = Worklist.pop_back_val();
+
+    if (!Visited.insert(V).second)
+      continue;
+
+    // Due to lack of cost-based heuristic, only traverse if it belongs to this
+    // expression tree.
+    bool ShouldTraverse = (V == Root || V->hasOneUse());
+
+    if (Value *NotV; match(V, m_Not(m_Value(NotV)))) {
+      if (auto *I = dyn_cast<Instruction>(V))
+        Instructions.push_back(I);
+      if (ShouldTraverse)
+        Worklist.push_back(NotV);
+      continue;
+    }
+    if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+      if (!BO->isBitwiseLogicOp()) {
+        if (V == Root)
+          return {nullptr, nullptr, nullptr};
+        if (!RootOperands.count(V))
+          Variables.insert(V);
+        continue;
+      }
+
+      Instructions.push_back(BO);
+
+      if (ShouldTraverse) {
+        Worklist.push_back(BO->getOperand(0));
+        Worklist.push_back(BO->getOperand(1));
+      }
+    } else if ((isa<Argument>(V) || isa<Instruction>(V)) && V != Root) {
+      if (!RootOperands.count(V))
+        Variables.insert(V);
+    }
+  }
+
+  if (Variables.size() != 3)
+    return {nullptr, nullptr, nullptr};
+  // Check that all instructions (both variables and computation instructions)
+  // are in the same BB.
+  SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
+  BasicBlock *FirstBB = nullptr;
+
+  auto CheckSameBB = [&FirstBB](Instruction *I) -> bool {
+    if (!FirstBB)
+      FirstBB = I->getParent();
+    else if (I->getParent() != FirstBB)
+      return false;
+    return true;
+  };
+
+  for (Value *V : SortedVars)
+    if (auto *I = dyn_cast<Instruction>(V); I && !CheckSameBB(I))
+      return {nullptr, nullptr, nullptr};
+
+  for (Instruction *I : Instructions)
+    if (!CheckSameBB(I))
+      return {nullptr, nullptr, nullptr};
+
+  // Validation that all collected instructions have operands that will be in
+  // Computed map.
+  SmallPtrSet<Value *, 32> ValidOperands(Variables.begin(), Variables.end());
+  ValidOperands.insert(Instructions.begin(), Instructions.end());
+
+  for (Instruction *I : Instructions) {
+    Value *NotV;
+    bool IsNot = match(I, m_Not(m_Value(NotV)));
+
+    if (!IsNot) {
+      for (Use &U : I->operands()) {
+        if (!ValidOperands.count(U.get()))
+          return {nullptr, nullptr, nullptr};
+      }
+    } else if (!ValidOperands.count(NotV)) {
+      // For NOT: only check the variable operand (constant -1 is handled by
+      // pattern matcher).
+      return {nullptr, nullptr, nullptr};
+    }
+  }
----------------
nikic wrote:

Do I understand correctly that what this actually does is discarding cases with constant operands, as everything else should have already been handled in the initial loop? If so, can we bail out on constant operands there already?

https://github.com/llvm/llvm-project/pull/149530


More information about the llvm-commits mailing list