[clang] [clang][dataflow] Factor out built-in boolean model into an explicit module. (PR #82950)

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Sun Feb 25 18:42:33 PST 2024


https://github.com/ymand created https://github.com/llvm/llvm-project/pull/82950

Draft to demo how we can pull out the boolean model. Let's discuss specifics of
namings, location, etc.

The purpose of this refactoring is to enable us to compare the performance of
different boolean models. In particular, we're interested in investigating a
very simple semantic domain of just the booleans (and Top).

In the process, the PR drastically simplifies the handling of terminators. This
cleanup can be pulled out into its own PR, to precede the refactoring work.


>From e0c51969f70079e18b7cacb99c3b9c1a7470b39a Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Fri, 23 Feb 2024 20:15:36 +0000
Subject: [PATCH] [clang][dataflow] Factor out built-in boolean model into an
 explicit module.

In the process, drastically simplify the handling of terminators.
---
 .../clang/Analysis/FlowSensitive/Transfer.h   |  24 +++
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 144 ++++++++++++------
 .../TypeErasedDataflowAnalysis.cpp            | 126 +++++----------
 3 files changed, 161 insertions(+), 133 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index 7713df747cb76e..eea0cdd37ff63d 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -48,6 +48,30 @@ class StmtToEnvMap {
   const TypeErasedDataflowAnalysisState &CurState;
 };
 
+namespace bool_model {
+
+BoolValue &freshBoolValue(Environment &Env);
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env);
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env);
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env);
+
+// Models the transition along a branch edge in the CFG.
+// BranchVal -- the concrete, dynamic branch value -- true for `then` and false
+// for `else`.
+// CondVal -- the abstract value representing the condition.
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env);
+
+} // namespace bool_model
+
 /// Evaluates `S` and updates `Env` accordingly.
 ///
 /// Requirements:
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index a098471d0ee905..65072d85d10c69 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -50,21 +50,6 @@ const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
   return &State->Env;
 }
 
-static BoolValue &evaluateBooleanEquality(const Expr &LHS, const Expr &RHS,
-                                          Environment &Env) {
-  Value *LHSValue = Env.getValue(LHS);
-  Value *RHSValue = Env.getValue(RHS);
-
-  if (LHSValue == RHSValue)
-    return Env.getBoolLiteralValue(true);
-
-  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
-    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
-      return Env.makeIff(*LHSBool, *RHSBool);
-
-  return Env.makeAtomicBoolValue();
-}
-
 static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
   if (auto *Top = llvm::dyn_cast<TopBoolValue>(&V)) {
     auto &A = Env.getDataflowAnalysisContext().arena();
@@ -73,6 +58,68 @@ static BoolValue &unpackValue(BoolValue &V, Environment &Env) {
   return V;
 }
 
+static void propagateValue(const Expr &From, const Expr &To, Environment &Env) {
+  if (auto *Val = Env.getValue(From))
+    Env.setValue(To, *Val);
+}
+
+static void propagateStorageLocation(const Expr &From, const Expr &To,
+                                     Environment &Env) {
+  if (auto *Loc = Env.getStorageLocation(From))
+    Env.setStorageLocation(To, *Loc);
+}
+
+// Propagates the value or storage location of `From` to `To` in cases where
+// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff
+// `From` is a glvalue.
+static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
+                                            Environment &Env) {
+  assert(From.isGLValue() == To.isGLValue());
+  if (From.isGLValue())
+    propagateStorageLocation(From, To, Env);
+  else
+    propagateValue(From, To, Env);
+}
+
+namespace bool_model {
+
+BoolValue &freshBoolValue(Environment &Env) {
+  return Env.makeAtomicBoolValue();
+}
+
+BoolValue &rValueFromLValue(BoolValue &V, Environment &Env) {
+  return unpackValue(V, Env);
+}
+
+BoolValue &logicalOrOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeOr(LHS, RHS);
+}
+
+BoolValue &logicalAndOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeAnd(LHS, RHS);
+}
+
+BoolValue &eqOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeIff(LHS, RHS);
+}
+
+BoolValue &neOp(BoolValue &LHS, BoolValue &RHS, Environment &Env) {
+  return Env.makeNot(Env.makeIff(LHS, RHS));
+}
+
+BoolValue &notOp(BoolValue &Sub, Environment &Env) { return Env.makeNot(Sub); }
+
+void transferBranch(bool BranchVal, BoolValue &CondVal, Environment &Env) {
+  if (BranchVal)
+    Env.assume(CondVal.formula());
+  else
+    // The condition must be inverted for the successor that encompasses the
+    // "else" branch, if such exists.
+    Env.assume(Env.makeNot(CondVal).formula());
+}
+
+} // namespace bool_model
+
 // Unpacks the value (if any) associated with `E` and updates `E` to the new
 // value, if any unpacking occured. Also, does the lvalue-to-rvalue conversion,
 // by skipping past the reference.
@@ -86,34 +133,45 @@ static Value *maybeUnpackLValueExpr(const Expr &E, Environment &Env) {
   if (B == nullptr)
     return Val;
 
-  auto &UnpackedVal = unpackValue(*B, Env);
+  auto &UnpackedVal = bool_model::rValueFromLValue(*B, Env);
   if (&UnpackedVal == Val)
     return Val;
   Env.setValue(*Loc, UnpackedVal);
   return &UnpackedVal;
 }
 
-static void propagateValue(const Expr &From, const Expr &To, Environment &Env) {
-  if (auto *Val = Env.getValue(From))
-    Env.setValue(To, *Val);
+static BoolValue &evaluateEquality(const Expr &LHS, const Expr &RHS,
+                                   Environment &Env) {
+  Value *LHSValue = Env.getValue(LHS);
+  Value *RHSValue = Env.getValue(RHS);
+
+  // Bug!
+  if (LHSValue == RHSValue)
+    return Env.getBoolLiteralValue(true);
+
+  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
+    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
+      return bool_model::eqOp(*LHSBool, *RHSBool, Env);
+
+  // TODO Why this eager construcoitn of an atomic?
+  return Env.makeAtomicBoolValue();
 }
 
-static void propagateStorageLocation(const Expr &From, const Expr &To,
+static BoolValue &evaluateInequality(const Expr &LHS, const Expr &RHS,
                                      Environment &Env) {
-  if (auto *Loc = Env.getStorageLocation(From))
-    Env.setStorageLocation(To, *Loc);
-}
+  Value *LHSValue = Env.getValue(LHS);
+  Value *RHSValue = Env.getValue(RHS);
 
-// Propagates the value or storage location of `From` to `To` in cases where
-// `From` may be either a glvalue or a prvalue. `To` must be a glvalue iff
-// `From` is a glvalue.
-static void propagateValueOrStorageLocation(const Expr &From, const Expr &To,
-                                            Environment &Env) {
-  assert(From.isGLValue() == To.isGLValue());
-  if (From.isGLValue())
-    propagateStorageLocation(From, To, Env);
-  else
-    propagateValue(From, To, Env);
+  // Bug!
+  if (LHSValue == RHSValue)
+    return Env.getBoolLiteralValue(false);
+
+  if (auto *LHSBool = dyn_cast_or_null<BoolValue>(LHSValue))
+    if (auto *RHSBool = dyn_cast_or_null<BoolValue>(RHSValue))
+      return bool_model::neOp(*LHSBool, *RHSBool, Env);
+
+  // TODO Why this eager construcoitn of an atomic?
+  return Env.makeAtomicBoolValue();
 }
 
 namespace {
@@ -153,22 +211,20 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS);
 
       if (S->getOpcode() == BO_LAnd)
-        Env.setValue(*S, Env.makeAnd(LHSVal, RHSVal));
+        Env.setValue(*S, bool_model::logicalAndOp(LHSVal, RHSVal, Env));
       else
-        Env.setValue(*S, Env.makeOr(LHSVal, RHSVal));
+        Env.setValue(*S, bool_model::logicalOrOp(LHSVal, RHSVal, Env));
       break;
     }
     case BO_NE:
-    case BO_EQ: {
-      auto &LHSEqRHSValue = evaluateBooleanEquality(*LHS, *RHS, Env);
-      Env.setValue(*S, S->getOpcode() == BO_EQ ? LHSEqRHSValue
-                                               : Env.makeNot(LHSEqRHSValue));
+      Env.setValue(*S, evaluateInequality(*LHS, *RHS, Env));
       break;
-    }
-    case BO_Comma: {
+    case BO_EQ:
+      Env.setValue(*S, evaluateEquality(*LHS, *RHS, Env));
+      break;
+    case BO_Comma:
       propagateValueOrStorageLocation(*RHS, *S, Env);
       break;
-    }
     default:
       break;
     }
@@ -273,7 +329,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       else
         // FIXME: If integer modeling is added, then update this code to create
         // the boolean based on the integer model.
-        Env.setValue(*S, Env.makeAtomicBoolValue());
+        Env.setValue(*S, bool_model::freshBoolValue(Env));
       break;
     }
 
@@ -362,7 +418,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       if (SubExprVal == nullptr)
         break;
 
-      Env.setValue(*S, Env.makeNot(*SubExprVal));
+      Env.setValue(*S, bool_model::notOp(*SubExprVal, Env));
       break;
     }
     default:
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 4c88c46142d64d..4f756dc9c16a5c 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
 #include <optional>
 #include <system_error>
 #include <utility>
@@ -33,7 +32,6 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 
@@ -64,88 +62,27 @@ static bool isBackedgeNode(const CFGBlock &B) {
 
 namespace {
 
-// The return type of the visit functions in TerminatorVisitor. The first
-// element represents the terminator expression (that is the conditional
-// expression in case of a path split in the CFG). The second element
-// represents whether the condition was true or false.
-using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
-
-/// Extends the flow condition of an environment based on a terminator
-/// statement.
+/// Extracts the condition expression.
 class TerminatorVisitor
-    : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
+    : public ConstStmtVisitor<TerminatorVisitor, const Expr *> {
 public:
-  TerminatorVisitor(Environment &Env, int BlockSuccIdx)
-      : Env(Env), BlockSuccIdx(BlockSuccIdx) {}
-
-  TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) {
-    auto *Cond = S->getCond();
-    if (Cond != nullptr)
-      return extendFlowCondition(*Cond);
-    return {nullptr, false};
-  }
-
-  TerminatorVisitorRetTy VisitCXXForRangeStmt(const CXXForRangeStmt *) {
+  TerminatorVisitor() = default;
+  const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); }
+  const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); }
+  const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); }
+  const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); }
+  const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) {
     // Don't do anything special for CXXForRangeStmt, because the condition
     // (being implicitly generated) isn't visible from the loop body.
-    return {nullptr, false};
+    return nullptr;
   }
-
-  TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) {
+  const Expr *VisitBinaryOperator(const BinaryOperator *S) {
     assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
-    auto *LHS = S->getLHS();
-    assert(LHS != nullptr);
-    return extendFlowCondition(*LHS);
+    return S->getLHS();
   }
-
-  TerminatorVisitorRetTy
-  VisitConditionalOperator(const ConditionalOperator *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-private:
-  TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
-    auto *Val = Env.get<BoolValue>(Cond);
-    // In transferCFGBlock(), we ensure that we always have a `Value` for the
-    // terminator condition, so assert this.
-    // We consciously assert ourselves instead of asserting via `cast()` so
-    // that we get a more meaningful line number if the assertion fails.
-    assert(Val != nullptr);
-
-    bool ConditionValue = true;
-    // The condition must be inverted for the successor that encompasses the
-    // "else" branch, if such exists.
-    if (BlockSuccIdx == 1) {
-      Val = &Env.makeNot(*Val);
-      ConditionValue = false;
-    }
-
-    Env.assume(Val->formula());
-    return {&Cond, ConditionValue};
+  const Expr *VisitConditionalOperator(const ConditionalOperator *S) {
+    return S->getCond();
   }
-
-  Environment &Env;
-  int BlockSuccIdx;
 };
 
 /// Holds data structures required for running dataflow analysis.
@@ -319,26 +256,36 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
         AC.BlockStates[Pred->getBlockID()];
     if (!MaybePredState)
       continue;
+    const TypeErasedDataflowAnalysisState &PredState = *MaybePredState;
 
     if (AC.Analysis.builtinOptions()) {
       if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
-        // We have a terminator: we need to mutate an environment to describe
-        // when the terminator is taken. Copy now.
-        TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
-
-        auto [Cond, CondValue] =
-            TerminatorVisitor(Copy.Env, blockIndexInPredecessor(*Pred, Block))
-                .Visit(PredTerminatorStmt);
-        if (Cond != nullptr)
+        bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0;
+        const Expr *Cond = TerminatorVisitor().Visit(PredTerminatorStmt);
+        if (Cond != nullptr) {
+          // `transferBranch` may need to mutate the environment to describe the
+          // dynamic effect of the terminator for a given branch.  Copy now.
+          TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
+
+          auto *Val = Copy.Env.get<BoolValue>(*Cond);
+          // In transferCFGBlock(), we ensure that we always have a `Value` for
+          // the terminator condition, so assert this.
+          // We consciously assert ourselves instead of asserting via `cast()`
+          // so that we get a more meaningful line number if the assertion
+          // fails.
+          assert(Val != nullptr);
+          bool_model::transferBranch(BranchVal, *Val, Copy.Env);
+
           // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
           // are not set.
-          AC.Analysis.transferBranchTypeErased(CondValue, Cond, Copy.Lattice,
+          AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice,
                                                Copy.Env);
-        Builder.addOwned(std::move(Copy));
-        continue;
+          Builder.addOwned(std::move(Copy));
+          continue;
+        }
       }
     }
-    Builder.addUnowned(*MaybePredState);
+    Builder.addUnowned(PredState);
   }
   return std::move(Builder).take();
 }
@@ -501,7 +448,8 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     // we have *some* value for the condition expression. This ensures that
     // when we extend the flow condition, it actually changes.
     if (State.Env.getValue(*TerminatorCond) == nullptr)
-      State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue());
+      State.Env.setValue(*TerminatorCond,
+                         bool_model::freshBoolValue(State.Env));
     AC.Log.recordState(State);
   }
 



More information about the cfe-commits mailing list