[clang] [clang][dataflow] Refactor processing of terminator element (PR #84499)

via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 8 07:22:59 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-analysis

Author: Yitzhak Mandelbaum (ymand)

<details>
<summary>Changes</summary>

This patch vastly simplifies the code handling terminators, without changing any
behavior. Additionally, the simplification unblocks our ability to address a
(simple) FIXME in the code to invoke `transferBranch`, even when builtin options
are disabled.


---
Full diff: https://github.com/llvm/llvm-project/pull/84499.diff


1 Files Affected:

- (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+46-119) 


``````````diff
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 939247c047c66e..3acd6be874a6b1 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
 #include <optional>
 #include <system_error>
 #include <utility>
@@ -33,7 +32,6 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 
@@ -64,88 +62,27 @@ static bool isBackedgeNode(const CFGBlock &B) {
 
 namespace {
 
-// The return type of the visit functions in TerminatorVisitor. The first
-// element represents the terminator expression (that is the conditional
-// expression in case of a path split in the CFG). The second element
-// represents whether the condition was true or false.
-using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
-
-/// Extends the flow condition of an environment based on a terminator
-/// statement.
+/// Extracts the condition expression.
 class TerminatorVisitor
-    : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
+    : public ConstStmtVisitor<TerminatorVisitor, const Expr *> {
 public:
-  TerminatorVisitor(Environment &Env, int BlockSuccIdx)
-      : Env(Env), BlockSuccIdx(BlockSuccIdx) {}
-
-  TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
-  }
-
-  TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) {
-    auto *Cond = S->getCond();
-    if (Cond != nullptr)
-      return extendFlowCondition(*Cond);
-    return {nullptr, false};
-  }
-
-  TerminatorVisitorRetTy VisitCXXForRangeStmt(const CXXForRangeStmt *) {
+  TerminatorVisitor() = default;
+  const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); }
+  const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); }
+  const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); }
+  const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); }
+  const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) {
     // Don't do anything special for CXXForRangeStmt, because the condition
     // (being implicitly generated) isn't visible from the loop body.
-    return {nullptr, false};
+    return nullptr;
   }
-
-  TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) {
+  const Expr *VisitBinaryOperator(const BinaryOperator *S) {
     assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
-    auto *LHS = S->getLHS();
-    assert(LHS != nullptr);
-    return extendFlowCondition(*LHS);
+    return S->getLHS();
   }
-
-  TerminatorVisitorRetTy
-  VisitConditionalOperator(const ConditionalOperator *S) {
-    auto *Cond = S->getCond();
-    assert(Cond != nullptr);
-    return extendFlowCondition(*Cond);
+  const Expr *VisitConditionalOperator(const ConditionalOperator *S) {
+    return S->getCond();
   }
-
-private:
-  TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
-    auto *Val = Env.get<BoolValue>(Cond);
-    // In transferCFGBlock(), we ensure that we always have a `Value` for the
-    // terminator condition, so assert this.
-    // We consciously assert ourselves instead of asserting via `cast()` so
-    // that we get a more meaningful line number if the assertion fails.
-    assert(Val != nullptr);
-
-    bool ConditionValue = true;
-    // The condition must be inverted for the successor that encompasses the
-    // "else" branch, if such exists.
-    if (BlockSuccIdx == 1) {
-      Val = &Env.makeNot(*Val);
-      ConditionValue = false;
-    }
-
-    Env.assume(Val->formula());
-    return {&Cond, ConditionValue};
-  }
-
-  Environment &Env;
-  int BlockSuccIdx;
 };
 
 /// Holds data structures required for running dataflow analysis.
@@ -221,7 +158,6 @@ class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry {
 // Avoids unneccesary copies of the environment.
 class JoinedStateBuilder {
   AnalysisContext &AC;
-  Environment::ExprJoinBehavior JoinBehavior;
   std::vector<const TypeErasedDataflowAnalysisState *> All;
   std::deque<TypeErasedDataflowAnalysisState> Owned;
 
@@ -229,13 +165,11 @@ class JoinedStateBuilder {
   join(const TypeErasedDataflowAnalysisState &L,
        const TypeErasedDataflowAnalysisState &R) {
     return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice),
-            Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)};
+            Environment::join(L.Env, R.Env, AC.Analysis)};
   }
 
 public:
-  JoinedStateBuilder(AnalysisContext &AC,
-                     Environment::ExprJoinBehavior JoinBehavior)
-      : AC(AC), JoinBehavior(JoinBehavior) {}
+  JoinedStateBuilder(AnalysisContext &AC) : AC(AC) {}
 
   void addOwned(TypeErasedDataflowAnalysisState State) {
     Owned.push_back(std::move(State));
@@ -251,12 +185,12 @@ class JoinedStateBuilder {
       // initialize the state of each basic block differently.
       return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()};
     if (All.size() == 1)
-      // Join the environment with itself so that we discard expression state if
-      // desired.
+      // Join the environment with itself so that we discard the entries from
+      // `ExprToLoc` and `ExprToVal`.
       // FIXME: We could consider writing special-case code for this that only
       // does the discarding, but it's not clear if this is worth it.
-      return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env,
-                                                 AC.Analysis, JoinBehavior)};
+      return {All[0]->Lattice,
+              Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)};
 
     auto Result = join(*All[0], *All[1]);
     for (unsigned I = 2; I < All.size(); ++I)
@@ -310,22 +244,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
     }
   }
 
-  // If any of the predecessor blocks contains an expression consumed in a
-  // different block, we need to keep expression state.
-  // Note that in this case, we keep expression state for all predecessors,
-  // rather than only those predecessors that actually contain an expression
-  // consumed in a different block. While this is potentially suboptimal, it's
-  // actually likely, if we have control flow within a full expression, that
-  // all predecessors have expression state consumed in a different block.
-  Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState;
-  for (const CFGBlock *Pred : Preds) {
-    if (Pred && AC.CFCtx.containsExprConsumedInDifferentBlock(*Pred)) {
-      JoinBehavior = Environment::KeepExprState;
-      break;
-    }
-  }
-
-  JoinedStateBuilder Builder(AC, JoinBehavior);
+  JoinedStateBuilder Builder(AC);
   for (const CFGBlock *Pred : Preds) {
     // Skip if the `Block` is unreachable or control flow cannot get past it.
     if (!Pred || Pred->hasNoReturnElement())
@@ -337,26 +256,33 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
         AC.BlockStates[Pred->getBlockID()];
     if (!MaybePredState)
       continue;
+    const TypeErasedDataflowAnalysisState &PredState = *MaybePredState;
 
-    if (AC.Analysis.builtinOptions()) {
       if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
-        // We have a terminator: we need to mutate an environment to describe
-        // when the terminator is taken. Copy now.
-        TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
-
-        auto [Cond, CondValue] =
-            TerminatorVisitor(Copy.Env, blockIndexInPredecessor(*Pred, Block))
-                .Visit(PredTerminatorStmt);
-        if (Cond != nullptr)
-          // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
-          // are not set.
-          AC.Analysis.transferBranchTypeErased(CondValue, Cond, Copy.Lattice,
+        bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0;
+        const Expr *Cond = TerminatorVisitor().Visit(PredTerminatorStmt);
+        if (Cond != nullptr) {
+          // `transferBranch` may need to mutate the environment to describe the
+          // dynamic effect of the terminator for a given branch.  Copy now.
+          TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
+          if (AC.Analysis.builtinOptions()) {
+            auto *CondVal = Copy.Env.get<BoolValue>(*Cond);
+            // In transferCFGBlock(), we ensure that we always have a `Value`
+            // for the terminator condition, so assert this. We consciously
+            // assert ourselves instead of asserting via `cast()` so that we get
+            // a more meaningful line number if the assertion fails.
+            assert(CondVal != nullptr);
+            BoolValue *AssertedVal =
+                BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal);
+            Copy.Env.assume(AssertedVal->formula());
+          }
+          AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice,
                                                Copy.Env);
-        Builder.addOwned(std::move(Copy));
-        continue;
+          Builder.addOwned(std::move(Copy));
+          continue;
+        }
       }
-    }
-    Builder.addUnowned(*MaybePredState);
+      Builder.addUnowned(PredState);
   }
   return std::move(Builder).take();
 }
@@ -406,6 +332,7 @@ builtinTransferInitializer(const CFGInitializer &Elt,
     }
   }
   assert(Member != nullptr);
+  assert(MemberLoc != nullptr);
 
   // FIXME: Instead of these case distinctions, we would ideally want to be able
   // to simply use `Environment::createObject()` here, the same way that we do
@@ -421,7 +348,6 @@ builtinTransferInitializer(const CFGInitializer &Elt,
 
     ParentLoc->setChild(*Member, InitExprLoc);
   } else if (auto *InitExprVal = Env.getValue(*InitExpr)) {
-    assert(MemberLoc != nullptr);
     if (Member->getType()->isRecordType()) {
       auto *InitValStruct = cast<RecordValue>(InitExprVal);
       // FIXME: Rather than performing a copy here, we should really be
@@ -519,7 +445,8 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     // we have *some* value for the condition expression. This ensures that
     // when we extend the flow condition, it actually changes.
     if (State.Env.getValue(*TerminatorCond) == nullptr)
-      State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue());
+      State.Env.setValue(*TerminatorCond,
+                         bool_model::freshBoolValue(State.Env));
     AC.Log.recordState(State);
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/84499


More information about the cfe-commits mailing list