[clang] [clang][dataflow] Factor out built-in boolean model into an explicit module. (PR #82950)

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 11 10:04:06 PDT 2024


https://github.com/ymand updated https://github.com/llvm/llvm-project/pull/82950

>From 3b20e1823753ab46e3e259d3d8c727dea91ce1d4 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Fri, 8 Mar 2024 15:19:14 +0000
Subject: [PATCH 1/3] [clang][dataflow] Refactor processing of terminator
 element

This patch vastly simplifies the code handling terminators, without changing any
behavior. Additionally, the simplification unblocks our ability to address a
(simple) FIXME in the code to invoke `transferBranch`, even when builtin options
are disabled.
---
 .../TypeErasedDataflowAnalysis.cpp            | 126 +++++-------------
 1 file changed, 35 insertions(+), 91 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 939247c047c66e..2d745231fd0758 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
 #include <optional>
 #include <system_error>
 #include <utility>
@@ -33,7 +32,6 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 
@@ -64,88 +62,27 @@ static bool isBackedgeNode(const CFGBlock &B) {
 
 namespace {
 
-// The return type of the visit functions in TerminatorVisitor. The first
-// element represents the terminator expression (that is the conditional
-// expression in case of a path split in the CFG). The second element
-// represents whether the condition was true or false.
-using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
-
-/// Extends the flow condition of an environment based on a terminator
-/// statement.
+/// Extracts the condition expression.
 class TerminatorVisitor
-    : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
+    : public ConstStmtVisitor<TerminatorVisitor, const Expr *> {
 public:
-  TerminatorVisitor(Environment &Env, int BlockSuccIdx)
-      : Env(Env), BlockSuccIdx(BlockSuccIdx) {}
-
-  TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) {
-    auto *Cond = S->getCond();
-    if (Cond != nullptr)
-      return extendFlowCondition(*Cond);
-    return {nullptr, false};
-  }
-
-  TerminatorVisitorRetTy VisitCXXForRangeStmt(const CXXForRangeStmt *) {
+  TerminatorVisitor() = default;
+  const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); }
+  const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); }
+  const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); }
+  const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); }
+  const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) {
     // Don't do anything special for CXXForRangeStmt, because the condition
     // (being implicitly generated) isn't visible from the loop body.
-    return {nullptr, false};
+    return nullptr;
   }
-
-  TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) {
+  const Expr *VisitBinaryOperator(const BinaryOperator *S) {
     assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
-    auto *LHS = S->getLHS();
-    assert(LHS != nullptr);
-    return extendFlowCondition(*LHS);
+    return S->getLHS();
   }
-
-  TerminatorVisitorRetTy
-  VisitConditionalOperator(const ConditionalOperator *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-private:
-  TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
-    auto *Val = Env.get<BoolValue>(Cond);
-    // In transferCFGBlock(), we ensure that we always have a `Value` for the
-    // terminator condition, so assert this.
-    // We consciously assert ourselves instead of asserting via `cast()` so
-    // that we get a more meaningful line number if the assertion fails.
-    assert(Val != nullptr);
-
-    bool ConditionValue = true;
-    // The condition must be inverted for the successor that encompasses the
-    // "else" branch, if such exists.
-    if (BlockSuccIdx == 1) {
-      Val = &Env.makeNot(*Val);
-      ConditionValue = false;
-    }
-
-    Env.assume(Val->formula());
-    return {&Cond, ConditionValue};
+  const Expr *VisitConditionalOperator(const ConditionalOperator *S) {
+    return S->getCond();
   }
-
-  Environment &Env;
-  int BlockSuccIdx;
 };
 
 /// Holds data structures required for running dataflow analysis.
@@ -337,26 +274,33 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
         AC.BlockStates[Pred->getBlockID()];
     if (!MaybePredState)
       continue;
-
-    if (AC.Analysis.builtinOptions()) {
-      if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
-        // We have a terminator: we need to mutate an environment to describe
-        // when the terminator is taken. Copy now.
+    const TypeErasedDataflowAnalysisState &PredState = *MaybePredState;
+
+    if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
+      bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0;
+      const Expr *Cond = TerminatorVisitor().Visit(PredTerminatorStmt);
+      if (Cond != nullptr) {
+        // `transferBranch` may need to mutate the environment to describe the
+        // dynamic effect of the terminator for a given branch.  Copy now.
         TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
-
-        auto [Cond, CondValue] =
-            TerminatorVisitor(Copy.Env, blockIndexInPredecessor(*Pred, Block))
-                .Visit(PredTerminatorStmt);
-        if (Cond != nullptr)
-          // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
-          // are not set.
-          AC.Analysis.transferBranchTypeErased(CondValue, Cond, Copy.Lattice,
-                                               Copy.Env);
+        if (AC.Analysis.builtinOptions()) {
+          auto *CondVal = Copy.Env.get<BoolValue>(*Cond);
+          // In transferCFGBlock(), we ensure that we always have a `Value`
+          // for the terminator condition, so assert this. We consciously
+          // assert ourselves instead of asserting via `cast()` so that we get
+          // a more meaningful line number if the assertion fails.
+          assert(CondVal != nullptr);
+          BoolValue *AssertedVal =
+              BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal);
+          Copy.Env.assume(AssertedVal->formula());
+        }
+        AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice,
+                                             Copy.Env);
         Builder.addOwned(std::move(Copy));
         continue;
       }
     }
-    Builder.addUnowned(*MaybePredState);
+    Builder.addUnowned(PredState);
   }
   return std::move(Builder).take();
 }

>From 5cb5ae21e9a74f9b22d64cd67ccd69b22caaf378 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Fri, 23 Feb 2024 20:15:36 +0000
Subject: [PATCH 2/3] [clang][dataflow] Factor out built-in boolean model into
 an explicit module.

In the process, drastically simplify the handling of terminators.
---
 .../clang/Analysis/FlowSensitive/Transfer.h   |  24 +++
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 144 ++++++++++++------
 2 files changed, 124 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index 7713df747cb76e..eea0cdd37ff63d 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -48,6 +48,30 @@ class StmtToEnvMap {
   const TypeErasedDataflowAnalysisState &CurState;
 };
 
+namespace bool_model {
+
+BoolValue &freshBoolValue(Environment &Env);
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env);
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env);
+
+// Models the transition along a branch edge in the CFG.
+// BranchVal -- the concrete, dynamic branch value -- true for `then` and false
+// for `else`.
+// CondVal -- the abstract value representing the condition.
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env);
+
+} // namespace bool_model
+
 /// Evaluates `S` and updates `Env` accordingly.
 ///
 /// Requirements:
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 04aa2831df0558..1f3acba268d0a7 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -50,21 +50,6 @@ const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
   return &State->Env;
 }
 
-static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS,
-                                          Environment &Env) {
-  Value *LHSValue = Env.getValue(LHS);
-  Value *RHSValue = Env.getValue(RHS);
-
-  if (LHSValue == RHSValue)
-    return Env.getBoolLiteralValue(true);
-
-  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
-    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
-      return Env.makeIff(*LHSBool, *RHSBool);
-
-  return Env.makeAtomicBoolValue();
-}
-
 static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
   if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) {
     auto &A = Env.getDataflowAnalysisContext().arena();
@@ -73,6 +58,68 @@ static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
   return V;
 }
 
+static void propagateValue(const Expr &From, const Expr &To, Environment &Env) {
+  if (auto *Val = Env.getValue(From))
+    Env.setValue(To, *Val);
+}
+
+static void propagateStorageLocation(const Expr &From, const Expr &To,
+                                     Environment &Env) {
+  if (auto *Loc = Env.getStorageLocation(From))
+    Env.setStorageLocation(To, *Loc);
+}
+
+// Propagates the value or storage location of `From` to `To` in cases where
+// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff
+// `From` is a glvalue.
+static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
+                                            Environment &Env) {
+  assert(From.isGLValue() == To.isGLValue());
+  if (From.isGLValue())
+    propagateStorageLocation(From, To, Env);
+  else
+    propagateValue(From, To, Env);
+}
+
+namespace bool_model {
+
+BoolValue &freshBoolValue(Environment &Env) {
+  return Env.makeAtomicBoolValue();
+}
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env) {
+  return unpackValue(V, Env);
+}
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeOr(LHS, RHS);
+}
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeAnd(LHS, RHS);
+}
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeIff(LHS, RHS);
+}
+
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeNot(Env.makeIff(LHS, RHS));
+}
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env) { return Env.makeNot(Sub); }
+
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env) {
+  if (BranchVal)
+    Env.assume(CondVal.formula());
+  else
+    // The condition must be inverted for the successor that encompasses the
+    // "else" branch, if such exists.
+    Env.assume(Env.makeNot(CondVal).formula());
+}
+
+} // namespace bool_model
+
 // Unpacks the value (if any) associated with `E` and updates `E` to the new
 // value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion,
 // by skipping past the reference.
@@ -86,34 +133,45 @@ static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) {
   if (B == nullptr)
     return Val;
 
-  auto &UnpackedVal = unpackValue(*B, Env);
+  auto &UnpackedVal = bool_model::rValueFromLValue(*B, Env);
   if (&UnpackedVal == Val)
     return Val;
   Env.setValue(*Loc, UnpackedVal);
   return &UnpackedVal;
 }
 
-static void propagateValue(const Expr &From, const Expr &To, Environment &Env) {
-  if (auto *Val = Env.getValue(From))
-    Env.setValue(To, *Val);
+static BoolValue &evaluateEquality(const Expr &LHS, const Expr &RHS,
+                                   Environment &Env) {
+  Value *LHSValue = Env.getValue(LHS);
+  Value *RHSValue = Env.getValue(RHS);
+
+  // Bug!
+  if (LHSValue == RHSValue)
+    return Env.getBoolLiteralValue(true);
+
+  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
+    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
+      return bool_model::eqOp(*LHSBool, *RHSBool, Env);
+
+  // TODO Why this eager construcoitn of an atomic?
+  return Env.makeAtomicBoolValue();
 }
 
-static void propagateStorageLocation(const Expr &From, const Expr &To,
+static BoolValue &evaluateInequality(const Expr &LHS, const Expr &RHS,
                                      Environment &Env) {
-  if (auto *Loc = Env.getStorageLocation(From))
-    Env.setStorageLocation(To, *Loc);
-}
+  Value *LHSValue = Env.getValue(LHS);
+  Value *RHSValue = Env.getValue(RHS);
 
-// Propagates the value or storage location of `From` to `To` in cases where
-// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff
-// `From` is a glvalue.
-static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
-                                            Environment &Env) {
-  assert(From.isGLValue() == To.isGLValue());
-  if (From.isGLValue())
-    propagateStorageLocation(From, To, Env);
-  else
-    propagateValue(From, To, Env);
+  // Bug!
+  if (LHSValue == RHSValue)
+    return Env.getBoolLiteralValue(false);
+
+  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
+    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
+      return bool_model::neOp(*LHSBool, *RHSBool, Env);
+
+  // TODO Why this eager construcoitn of an atomic?
+  return Env.makeAtomicBoolValue();
 }
 
 namespace {
@@ -153,22 +211,20 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS);
 
       if (S->getOpcode() == BO_LAnd)
-        Env.setValue(*S, Env.makeAnd(LHSVal, RHSVal));
+        Env.setValue(*S, bool_model::logicalAndOp(LHSVal, RHSVal, Env));
       else
-        Env.setValue(*S, Env.makeOr(LHSVal, RHSVal));
+        Env.setValue(*S, bool_model::logicalOrOp(LHSVal, RHSVal, Env));
       break;
     }
     case BO_NE:
-    case BO_EQ: {
-      auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env);
-      Env.setValue(*S, S->getOpcode() == BO_EQ ? LHSEqRHSValue
-                                               : Env.makeNot(LHSEqRHSValue));
+      Env.setValue(*S, evaluateInequality(*LHS, *RHS, Env));
       break;
-    }
-    case BO_Comma: {
+    case BO_EQ:
+      Env.setValue(*S, evaluateEquality(*LHS, *RHS, Env));
+      break;
+    case BO_Comma:
       propagateValueOrStorageLocation(*RHS, *S, Env);
       break;
-    }
     default:
       break;
     }
@@ -273,7 +329,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       else
         // FIXME: If integer modeling is added, then update this code to create
         // the boolean based on the integer model.
-        Env.setValue(*S, Env.makeAtomicBoolValue());
+        Env.setValue(*S, bool_model::freshBoolValue(Env));
       break;
     }
 
@@ -362,7 +418,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       if (SubExprVal == nullptr)
         break;
 
-      Env.setValue(*S, Env.makeNot(*SubExprVal));
+      Env.setValue(*S, bool_model::notOp(*SubExprVal, Env));
       break;
     }
     default:

>From 16cdcfa9471a089af7e87b0dbea5941d3975c8b9 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Mon, 11 Mar 2024 16:57:14 +0000
Subject: [PATCH 3/3] [clang][dataflow] Add implementation of a simple boolean
 model

The new model still uses atomic variables in boolean formulas, but it limits the
environment to accumulating truth values for atomic variables, rather than
the arbitrary formula allowed by the flow condition.
---
 .../FlowSensitive/DataflowEnvironment.h       |   4 +
 .../clang/Analysis/FlowSensitive/Transfer.h   |  32 +++-
 .../FlowSensitive/DataflowEnvironment.cpp     |  23 +++
 .../Models/UncheckedOptionalAccessModel.cpp   |   3 +-
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 146 +++++++++++++++++-
 .../TypeErasedDataflowAnalysis.cpp            |   7 +-
 .../Analysis/FlowSensitive/TransferTest.cpp   |   8 +-
 .../TypeErasedDataflowAnalysisTest.cpp        |   4 +
 .../UncheckedOptionalAccessModelTest.cpp      |  17 +-
 9 files changed, 222 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 2330697299fdd7..15ce37c13a6c77 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -500,6 +500,9 @@ class Environment {
     return cast_or_null<T>(getValue(E));
   }
 
+  std::optional<bool> getAtomValue(Atom A) const;
+  void setAtomValue(Atom A, bool);
+
   // FIXME: should we deprecate the following & call arena().create() directly?
 
   /// Creates a `T` (some subclass of `Value`), forwarding `args` to the
@@ -720,6 +723,7 @@ class Environment {
   // deterministic sequence. This in turn produces deterministic SAT formulas.
   llvm::MapVector<const Expr *, Value *> ExprToVal;
   llvm::MapVector<const StorageLocation *, Value *> LocToVal;
+  llvm::MapVector<Atom, bool> AtomToVal;
 
   Atom FlowConditionToken;
 };
diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index eea0cdd37ff63d..f6b3e714b58611 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -48,7 +48,7 @@ class StmtToEnvMap {
   const TypeErasedDataflowAnalysisState &CurState;
 };
 
-namespace bool_model {
+namespace sat_bool_model {
 
 BoolValue &freshBoolValue(Environment &Env);
 
@@ -70,7 +70,35 @@ BoolValue &notOp(BoolValue &Sub, Environment &Env);
 // CondVal -- the abstract value representing the condition.
 void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env);
 
-} // namespace bool_model
+} // namespace sat_bool_model
+
+namespace simple_bool_model {
+
+std::optional<bool> getLiteralValue(const Formula &F, const Environment &Env);
+
+BoolValue &freshBoolValue(Environment &Env);
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env);
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env);
+
+// Models the transition along a branch edge in the CFG.
+// BranchVal -- the concrete, dynamic branch value -- true for `then` and false
+// for `else`.
+// CondVal -- the abstract value representing the condition.
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env);
+
+} // namespace simple_bool_model
+
+namespace bool_model = simple_bool_model;
 
 /// Evaluates `S` and updates `Env` accordingly.
 ///
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 1d2bd9a9b08af3..f0c500e2c5753d 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -17,6 +17,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Analysis/FlowSensitive/Transfer.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -706,6 +707,12 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
     JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
   }
 
+  for (auto &Entry : EnvA.AtomToVal) {
+    auto It = EnvB.AtomToVal.find(Entry.first);
+    if (It != EnvB.AtomToVal.end() && Entry.second == It->second)
+      JoinedEnv.AtomToVal.insert({Entry.first, Entry.second});
+  }
+
   return JoinedEnv;
 }
 
@@ -1059,9 +1066,16 @@ void Environment::assume(const Formula &F) {
   DACtx->addFlowConditionConstraint(FlowConditionToken, F);
 }
 
+#if 0
 bool Environment::proves(const Formula &F) const {
   return DACtx->flowConditionImplies(FlowConditionToken, F);
 }
+#else
+bool Environment::proves(const Formula &F) const {
+  auto V = simple_bool_model::getLiteralValue(F, *this);
+  return V.has_value() && *V;
+}
+#endif
 
 bool Environment::allows(const Formula &F) const {
   return DACtx->flowConditionAllows(FlowConditionToken, F);
@@ -1119,6 +1133,15 @@ void Environment::dump() const {
   dump(llvm::dbgs());
 }
 
+std::optional<bool> Environment::getAtomValue(Atom A) const {
+  auto It = AtomToVal.find(A);
+  if (It == AtomToVal.end())
+    return std::nullopt;
+  return It->second;
+}
+
+void Environment::setAtomValue(Atom A, bool b) { AtomToVal[A] = b; }
+
 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
                                                  const Environment &Env) {
   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
index 1d31b22b6d25ff..5949af2905106d 100644
--- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
@@ -569,8 +569,7 @@ const Formula &evaluateEquality(Arena &A, const Formula &EqVal,
   //    If neither is set, then they are equal.
   // We rewrite b) as !EqVal => (LHS v RHS), for a more compact formula.
   return A.makeAnd(
-      A.makeImplies(EqVal, A.makeOr(A.makeAnd(LHS, RHS),
-                                    A.makeAnd(A.makeNot(LHS), A.makeNot(RHS)))),
+      A.makeImplies(EqVal, A.makeEquals(LHS, RHS)),
       A.makeImplies(A.makeNot(EqVal), A.makeOr(LHS, RHS)));
 }
 
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 1f3acba268d0a7..5bf1cc19349123 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -81,7 +81,7 @@ static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
     propagateValue(From, To, Env);
 }
 
-namespace bool_model {
+namespace sat_bool_model {
 
 BoolValue &freshBoolValue(Environment &Env) {
   return Env.makeAtomicBoolValue();
@@ -110,15 +110,145 @@ BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
 BoolValue &notOp(BoolValue &Sub, Environment &Env) { return Env.makeNot(Sub); }
 
 void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env) {
-  if (BranchVal)
-    Env.assume(CondVal.formula());
-  else
-    // The condition must be inverted for the successor that encompasses the
-    // "else" branch, if such exists.
-    Env.assume(Env.makeNot(CondVal).formula());
+  // The condition must be inverted for the successor that encompasses the
+  // "else" branch, if such exists.
+  BoolValue &AssertedVal = BranchVal ? CondVal : Env.makeNot(CondVal);
+  Env.assume(AssertedVal.formula());
+}
+
+} // namespace sat_bool_model
+
+namespace simple_bool_model {
+
+std::optional<bool> getLiteralValue(const Formula &F, const Environment &Env) {
+  switch (F.kind()) {
+  case Formula::AtomRef:
+    return Env.getAtomValue(F.getAtom());
+  case Formula::Literal:
+    return F.literal();
+  case Formula::Not: {
+    ArrayRef<const Formula *> Operands = F.operands();
+    assert(Operands.size() == 1);
+    if (std::optional<bool> Maybe = getLiteralValue(*Operands[0], Env))
+      return !*Maybe;
+    return std::nullopt;
+  }
+  default:
+    return std::nullopt;
+  }
+}
+
+BoolValue &freshBoolValue(Environment &Env) {
+  return Env.makeAtomicBoolValue();
+}
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env) {
+  return unpackValue(V, Env);
+}
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  // FIXME: check if these optimizations are already built in to the formula
+  // generation.
+  if (auto V = getLiteralValue(LHS.formula(), Env))
+    return *V ? Env.getBoolLiteralValue(true) : RHS;
+  if (auto V = getLiteralValue(RHS.formula(), Env))
+    return *V ? Env.getBoolLiteralValue(true) : LHS;
+  return Env.makeOr(LHS, RHS);
+}
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  if (auto V = getLiteralValue(LHS.formula(), Env))
+    return *V ? RHS : Env.getBoolLiteralValue(false);
+  if (auto V = getLiteralValue(RHS.formula(), Env))
+    return *V ? LHS : Env.getBoolLiteralValue(false);
+  return Env.makeAnd(LHS, RHS);
+}
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  if (&LHS.formula() == &RHS.formula())
+    return Env.getBoolLiteralValue(true);
+  if (auto V = getLiteralValue(LHS.formula(), Env))
+    return *V ? RHS : Env.makeNot(RHS);
+  if (auto V = getLiteralValue(RHS.formula(), Env))
+    return *V ? LHS : Env.makeNot(LHS);
+  return Env.makeIff(LHS, RHS);
+}
+
+// FIXME: i think this could be implemented in terms of eqOp.
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  if (&LHS.formula() == &RHS.formula())
+    return Env.getBoolLiteralValue(false);
+  if (auto V = getLiteralValue(LHS.formula(), Env))
+    return *V ? Env.makeNot(RHS) : RHS;
+  if (auto V = getLiteralValue(RHS.formula(), Env))
+    return *V ? Env.makeNot(LHS) : LHS;
+  return Env.makeNot(Env.makeIff(LHS, RHS));
+}
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env) { return Env.makeNot(Sub); }
+
+// Updates atom settings in `Env` based on the formula. `AtomVal` indicates the
+// value to use for atoms encountered in the formula.
+static void assumeFormula(bool AtomVal, const Formula &F, Environment &Env) {
+  // TODO: handle literals directly rather than calling getLiteralValue.
+  std::optional<bool> Lit = getLiteralValue(F, Env);
+  if (Lit.has_value())
+    // FIXME: Nothing to do. Could verify no contradiction, but not sure what
+    // we'd do with that here. Need to poison the Env.
+    return;
+
+  switch (F.kind()) {
+  case Formula::AtomRef:
+    // FIXME: check for contradictions
+    Env.setAtomValue(F.getAtom(), AtomVal);
+    break;
+  case Formula::Not: {
+    ArrayRef<const Formula *> Operands = F.operands();
+    assert(Operands.size() == 1);
+    assumeFormula(!AtomVal, *Operands[0], Env);
+    break;
+  }
+  case Formula::And: {
+    if (AtomVal == true) {
+      ArrayRef<const Formula *> Operands = F.operands();
+      assert(Operands.size() == 2);
+      assumeFormula(true, *Operands[0], Env);
+      assumeFormula(true, *Operands[1], Env);
+    }
+    break;
+  }
+  case Formula::Or: {
+    if (AtomVal == false) {
+      // Interpret the negated "or" as "and" of negated operands.
+      ArrayRef<const Formula *> Operands = F.operands();
+      assert(Operands.size() == 2);
+      assumeFormula(false, *Operands[0], Env);
+      assumeFormula(false, *Operands[1], Env);
+    }
+    break;
+  }
+  case Formula::Equal: {
+    ArrayRef<const Formula *> Operands = F.operands();
+    assert(Operands.size() == 2);
+    auto &LHS = *Operands[0];
+    auto &RHS = *Operands[1];
+    if (auto V = getLiteralValue(LHS, Env)) {
+      assumeFormula(AtomVal == *V, RHS, Env);
+    } else if (auto V = getLiteralValue(RHS, Env)) {
+      assumeFormula(AtomVal == *V, LHS, Env);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env) {
+  assumeFormula(BranchVal, CondVal.formula(), Env);
 }
 
-} // namespace bool_model
+} // namespace simple_bool_model
 
 // Unpacks the value (if any) associated with `E` and updates `E` to the new
 // value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion,
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 2d745231fd0758..226c0eaaa89995 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -290,9 +290,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
           // assert ourselves instead of asserting via `cast()` so that we get
           // a more meaningful line number if the assertion fails.
           assert(CondVal != nullptr);
-          BoolValue *AssertedVal =
-              BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal);
-          Copy.Env.assume(AssertedVal->formula());
+          bool_model::transferBranch(BranchVal, *CondVal, Copy.Env);
         }
         AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice,
                                              Copy.Env);
@@ -463,7 +461,8 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     // we have *some* value for the condition expression. This ensures that
     // when we extend the flow condition, it actually changes.
     if (State.Env.getValue(*TerminatorCond) == nullptr)
-      State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue());
+      State.Env.setValue(*TerminatorCond,
+                         bool_model::freshBoolValue(State.Env));
     AC.Log.recordState(State);
   }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index a8c282f140b4cd..25fb99bc8641cb 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Analysis/FlowSensitive/Transfer.h"
 #include "TestingSupport.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -35,6 +36,7 @@ using ::testing::Eq;
 using ::testing::IsNull;
 using ::testing::Ne;
 using ::testing::NotNull;
+using ::testing::Optional;
 using ::testing::UnorderedElementsAre;
 
 void runDataflow(
@@ -4331,12 +4333,12 @@ TEST(TransferTest, LoopWithAssignmentConverges) {
     bool foo();
 
     void target() {
-       do {
+      do {
         bool Bar = foo();
         if (Bar) break;
         (void)Bar;
         /*[[p]]*/
-      } while (true);
+     } while (true);
     }
   )";
   // The key property that we are verifying is implicit in `runDataflow` --
@@ -4353,6 +4355,8 @@ TEST(TransferTest, LoopWithAssignmentConverges) {
         ASSERT_THAT(BarDecl, NotNull());
 
         auto &BarVal = getFormula(*BarDecl, Env);
+        ASSERT_EQ(BarVal.kind(), Formula::AtomRef);
+        ASSERT_THAT(Env.getAtomValue(BarVal.getAtom()), Optional(false));
         EXPECT_TRUE(Env.proves(Env.arena().makeNot(BarVal)));
       });
 }
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 9d05a0d6ca4010..c84df1da0e524b 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -286,6 +286,10 @@ TEST_F(DiscardExprStateTest, BooleanOperator) {
   const auto &AndOpState = blockStateForStmt(BlockStates, AndOp);
   EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getLHS()), LHSValue);
   EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getRHS()), RHSValue);
+  // FIXME: this test is too strict. We want to check equivalence not equality;
+  // as is, its a change detector test. Notice that we only evaluate `b1 && b2`
+  // in a context where we know that `b1 is true, so there's a potential
+  // optimization to store only `RHSValue` as the operation's value.
   EXPECT_EQ(AndOpState.Env.getValue(AndOp),
             &AndOpState.Env.makeAnd(*LHSValue, *RHSValue));
 
diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
index b6e4973fd7cb2b..f6e8f265859aeb 100644
--- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
@@ -1382,9 +1382,10 @@ class UncheckedOptionalAccessTest
 
 INSTANTIATE_TEST_SUITE_P(
     UncheckedOptionalUseTestInst, UncheckedOptionalAccessTest,
-    ::testing::Values(OptionalTypeIdentifier{"std", "optional"},
-                      OptionalTypeIdentifier{"absl", "optional"},
-                      OptionalTypeIdentifier{"base", "Optional"}),
+    ::testing::Values(OptionalTypeIdentifier{"std", "optional"}// ,
+                      // OptionalTypeIdentifier{"absl", "optional"},
+                      // OptionalTypeIdentifier{"base", "Optional"}
+                      ),
     [](const ::testing::TestParamInfo<OptionalTypeIdentifier> &Info) {
       return Info.param.NamespaceName;
     });
@@ -2954,7 +2955,7 @@ TEST_P(UncheckedOptionalAccessTest, CorrelatedBranches) {
   )");
 }
 
-TEST_P(UncheckedOptionalAccessTest, JoinDistinctValues) {
+TEST_P(UncheckedOptionalAccessTest, JoinDistinctValuesThenCheck) {
   ExpectDiagnosticsFor(
       R"code(
     #include "unchecked_optional_access_test.h"
@@ -2973,7 +2974,9 @@ TEST_P(UncheckedOptionalAccessTest, JoinDistinctValues) {
       }
     }
   )code");
+}
 
+TEST_P(UncheckedOptionalAccessTest, JoinDistinctValuesCheckInBranches) {
   ExpectDiagnosticsFor(R"code(
     #include "unchecked_optional_access_test.h"
 
@@ -2989,7 +2992,9 @@ TEST_P(UncheckedOptionalAccessTest, JoinDistinctValues) {
       opt.value();
     }
   )code");
+}
 
+TEST_P(UncheckedOptionalAccessTest, JoinDistinctValuesCheckInOneBranch) {
   ExpectDiagnosticsFor(
       R"code(
     #include "unchecked_optional_access_test.h"
@@ -3005,7 +3010,9 @@ TEST_P(UncheckedOptionalAccessTest, JoinDistinctValues) {
       opt.value(); // [[unsafe]]
     }
   )code");
+}
 
+TEST_P(UncheckedOptionalAccessTest, JoinDistinctValuesSetInBothBranches) {
   ExpectDiagnosticsFor(
       R"code(
     #include "unchecked_optional_access_test.h"
@@ -3020,7 +3027,9 @@ TEST_P(UncheckedOptionalAccessTest, JoinDistinctValues) {
       opt.value();
     }
   )code");
+}
 
+TEST_P(UncheckedOptionalAccessTest, JoinDistinctValuesSetInOneBranch) {
   ExpectDiagnosticsFor(
       R"code(
     #include "unchecked_optional_access_test.h"



More information about the cfe-commits mailing list