[clang] [clang][dataflow] Process terminator condition within `transferCFGBlock()`. (PR #78127)

via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 18 05:47:26 PST 2024


https://github.com/martinboehme updated https://github.com/llvm/llvm-project/pull/78127

>From d2fea007602cc4279a52e49db799aecd767dba70 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 18 Jan 2024 07:41:04 +0000
Subject: [PATCH 1/2] [clang][dataflow] Process terminator condition within
 `transferCFGBlock()`.

In particular, it's important that we create the "fallback" atomic at this point
(which we produce if the transfer function didn't produce a value for the
expression) so that it is placed in the correct environment.

Previously, we processed the terminator condition in the `TerminatorVisitor`,
which put the fallback atomic in a copy of the environment that is produced as
input for the _successor_ block, rather than the environment for the block
containing the expression for which we produce the fallback atomic.

As a result, we produce different fallback atomics every time we process the
successor block, and hence we don't have a consistent representation of the
terminator condition in the flow condition.

This patch includes a test (authored by ymand@) that fails without the fix.
---
 .../clang/Analysis/FlowSensitive/Transfer.h   | 13 +++-
 clang/lib/Analysis/FlowSensitive/Transfer.cpp |  2 +
 .../TypeErasedDataflowAnalysis.cpp            | 63 ++++++++++++-------
 .../Analysis/FlowSensitive/LoggerTest.cpp     |  1 +
 .../FlowSensitive/SignAnalysisTest.cpp        | 27 ++++++++
 .../Analysis/FlowSensitive/TransferTest.cpp   | 31 +++++++++
 6 files changed, 111 insertions(+), 26 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index 58bb77c4905c54..0ec3173fd4a05a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -25,10 +25,17 @@ namespace dataflow {
 /// Maps statements to the environments of basic blocks that contain them.
 class StmtToEnvMap {
 public:
+  // `CurBlock` is the block currently being processed, and `CurState` is the
+  // pending state currently associated with this block. These are supplied
+  // separately as the pending state for the current block may not yet be
+  // represented in `BlockToState`.
   StmtToEnvMap(const ControlFlowContext &CFCtx,
                llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
-                   BlockToState)
-      : CFCtx(CFCtx), BlockToState(BlockToState) {}
+                   BlockToState,
+               const CFGBlock &CurBlock,
+               const TypeErasedDataflowAnalysisState &CurState)
+      : CFCtx(CFCtx), BlockToState(BlockToState), CurBlock(CurBlock),
+        CurState(CurState) {}
 
   /// Returns the environment of the basic block that contains `S`.
   /// The result is guaranteed never to be null.
@@ -37,6 +44,8 @@ class StmtToEnvMap {
 private:
   const ControlFlowContext &CFCtx;
   llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState;
+  const CFGBlock &CurBlock;
+  const TypeErasedDataflowAnalysisState &CurState;
 };
 
 /// Evaluates `S` and updates `Env` accordingly.
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 55093c2e2cdaf0..750c579764ebea 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -42,6 +42,8 @@ const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
   assert(BlockIt != CFCtx.getStmtToBlock().end());
   if (!CFCtx.isBlockReachable(*BlockIt->getSecond()))
     return nullptr;
+  if (BlockIt->getSecond() == &CurBlock)
+    return &CurState.Env;
   const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
   if (!(State))
     return nullptr;
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index faf83a8920d4ea..5ee16123e76f28 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -75,9 +75,8 @@ using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
 class TerminatorVisitor
     : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
 public:
-  TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
-                    int BlockSuccIdx)
-      : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {}
+  TerminatorVisitor(Environment &Env, int BlockSuccIdx)
+      : Env(Env), BlockSuccIdx(BlockSuccIdx) {}
 
   TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
     auto *Cond = S->getCond();
@@ -126,19 +125,12 @@ class TerminatorVisitor
 
 private:
   TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
-    // The terminator sub-expression might not be evaluated.
-    if (Env.getValue(Cond) == nullptr)
-      transfer(StmtToEnv, Cond, Env);
-
     auto *Val = Env.get<BoolValue>(Cond);
-    // Value merging depends on flow conditions from different environments
-    // being mutually exclusive -- that is, they cannot both be true in their
-    // entirety (even if they may share some clauses). So, we need *some* value
-    // for the condition expression, even if just an atom.
-    if (Val == nullptr) {
-      Val = &Env.makeAtomicBoolValue();
-      Env.setValue(Cond, *Val);
-    }
+    // In transferCFGBlock(), we ensure that we always have a `Value` for the
+    // terminator condition, so assert this.
+    // We consciously assert ourselves instead of asserting via `cast()` so
+    // that we get a more meaningful line number if the assertion fails.
+    assert(Val != nullptr);
 
     bool ConditionValue = true;
     // The condition must be inverted for the successor that encompasses the
@@ -152,7 +144,6 @@ class TerminatorVisitor
     return {&Cond, ConditionValue};
   }
 
-  const StmtToEnvMap &StmtToEnv;
   Environment &Env;
   int BlockSuccIdx;
 };
@@ -335,10 +326,8 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
         // when the terminator is taken. Copy now.
         TypeErasedDataflowAnalysisState Copy = MaybePredState->fork();
 
-        const StmtToEnvMap StmtToEnv(AC.CFCtx, AC.BlockStates);
         auto [Cond, CondValue] =
-            TerminatorVisitor(StmtToEnv, Copy.Env,
-                              blockIndexInPredecessor(*Pred, Block))
+            TerminatorVisitor(Copy.Env, blockIndexInPredecessor(*Pred, Block))
                 .Visit(PredTerminatorStmt);
         if (Cond != nullptr)
           // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
@@ -356,12 +345,13 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
 
 /// Built-in transfer function for `CFGStmt`.
 static void
-builtinTransferStatement(const CFGStmt &Elt,
+builtinTransferStatement(const CFGBlock &CurBlock, const CFGStmt &Elt,
                          TypeErasedDataflowAnalysisState &InputState,
                          AnalysisContext &AC) {
   const Stmt *S = Elt.getStmt();
   assert(S != nullptr);
-  transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates), *S, InputState.Env);
+  transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates, CurBlock, InputState), *S,
+           InputState.Env);
 }
 
 /// Built-in transfer function for `CFGInitializer`.
@@ -428,12 +418,12 @@ builtinTransferInitializer(const CFGInitializer &Elt,
   }
 }
 
-static void builtinTransfer(const CFGElement &Elt,
+static void builtinTransfer(const CFGBlock &CurBlock, const CFGElement &Elt,
                             TypeErasedDataflowAnalysisState &State,
                             AnalysisContext &AC) {
   switch (Elt.getKind()) {
   case CFGElement::Statement:
-    builtinTransferStatement(Elt.castAs<CFGStmt>(), State, AC);
+    builtinTransferStatement(CurBlock, Elt.castAs<CFGStmt>(), State, AC);
     break;
   case CFGElement::Initializer:
     builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State);
@@ -477,7 +467,7 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     AC.Log.enterElement(Element);
     // Built-in analysis
     if (AC.Analysis.builtinOptions()) {
-      builtinTransfer(Element, State, AC);
+      builtinTransfer(Block, Element, State, AC);
     }
 
     // User-provided analysis
@@ -489,6 +479,31 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     }
     AC.Log.recordState(State);
   }
+
+  // If we have a terminator, evaluate its condition.
+  // This `Expr` may not appear as a `CFGElement` anywhere else, and it's
+  // important that we evaluate it here (rather than while processing the
+  // terminator) so that we put the corresponding value in the right
+  // environment.
+  if (const Expr *TerminatorCond =
+          dyn_cast_or_null<Expr>(Block.getTerminatorCondition())) {
+    if (State.Env.getValue(*TerminatorCond) == nullptr)
+      // FIXME: This only runs the builtin transfer, not the analysis-specific
+      // transfer. Fixing this isn't trivial, as the analysis-specific transfer
+      // takes a `CFGElement` as input, but some expressions only show up as a
+      // terminator condition, but not as a `CFGElement`. The condition of an if
+      // statement is one such example.
+      transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates, Block, State),
+               *TerminatorCond, State.Env);
+
+    // If the transfer function didn't produce a value, create an atom so that
+    // we have *some* value for the condition expression. This ensures that
+    // when we extend the flow condition, it actually changes.
+    if (State.Env.getValue(*TerminatorCond) == nullptr)
+      State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue());
+    AC.Log.recordState(State);
+  }
+
   return State;
 }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
index a60dbe1f61f6d6..c5594aa3fe9d1f 100644
--- a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
@@ -123,6 +123,7 @@ recordState(Elements=1, Branches=0, Joins=0)
 enterElement(b (ImplicitCastExpr, LValueToRValue, _Bool))
 transfer()
 recordState(Elements=2, Branches=0, Joins=0)
+recordState(Elements=2, Branches=0, Joins=0)
 
 enterBlock(3, false)
 transferBranch(0)
diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
index b5fc7bbc431eae..a6f4c45504fa6d 100644
--- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
@@ -895,6 +895,33 @@ TEST(SignAnalysisTest, BinaryEQ) {
       LangStandard::lang_cxx17);
 }
 
+TEST(SignAnalysisTest, ComplexLoopCondition) {
+  std::string Code = R"(
+    int foo();
+    void fun() {
+      int a, b;
+      while ((a = foo()) > 0 && (b = foo()) > 0) {
+        a;
+        b;
+        // [[p]]
+      }
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+        const ValueDecl *A = findValueDecl(ASTCtx, "a");
+        const ValueDecl *B = findValueDecl(ASTCtx, "b");
+
+        EXPECT_TRUE(isPositive(A, ASTCtx, Env));
+        EXPECT_TRUE(isPositive(B, ASTCtx, Env));
+      },
+      LangStandard::lang_cxx17);
+}
+
 TEST(SignAnalysisTest, JoinToTop) {
   std::string Code = R"(
     int foo();
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 056c4f3383d832..6d88e25f77c807 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -6408,4 +6408,35 @@ TEST(TransferTest, DifferentReferenceLocInJoin) {
       });
 }
 
+// This test verifies correct modeling of a relational dependency that goes
+// through unmodeled functions (the simple `cond()` in this case).
+TEST(TransferTest, ConditionalRelation) {
+  std::string Code = R"(
+    bool cond();
+    void target() {
+       bool a = true;
+       bool b = true;
+       if (cond()) {
+         a = false;
+         if (cond()) {
+           b = false;
+         }
+       }
+       (void)0;
+       // [[p]]
+    }
+ )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+        auto &A = Env.arena();
+        auto &VarA = getValueForDecl<BoolValue>(ASTCtx, Env, "a").formula();
+        auto &VarB = getValueForDecl<BoolValue>(ASTCtx, Env, "b").formula();
+
+        EXPECT_FALSE(Env.allows(A.makeAnd(VarA, A.makeNot(VarB))));
+      });
+}
+
 } // namespace

>From ee4c413a322aac2f9f2879ca51167feb908e5bc7 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 18 Jan 2024 13:47:02 +0000
Subject: [PATCH 2/2] fixup! [clang][dataflow] Process terminator condition
 within `transferCFGBlock()`.

---
 .../clang/Analysis/FlowSensitive/Transfer.h       | 14 +++++++-------
 clang/lib/Analysis/FlowSensitive/Transfer.cpp     |  2 +-
 .../FlowSensitive/TypeErasedDataflowAnalysis.cpp  | 15 ++++++++-------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index 0ec3173fd4a05a..7713df747cb76e 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -25,16 +25,16 @@ namespace dataflow {
 /// Maps statements to the environments of basic blocks that contain them.
 class StmtToEnvMap {
 public:
-  // `CurBlock` is the block currently being processed, and `CurState` is the
-  // pending state currently associated with this block. These are supplied
-  // separately as the pending state for the current block may not yet be
-  // represented in `BlockToState`.
+  // `CurBlockID` is the ID of the block currently being processed, and
+  // `CurState` is the pending state currently associated with this block. These
+  // are supplied separately as the pending state for the current block may not
+  // yet be represented in `BlockToState`.
   StmtToEnvMap(const ControlFlowContext &CFCtx,
                llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
                    BlockToState,
-               const CFGBlock &CurBlock,
+               unsigned CurBlockID,
                const TypeErasedDataflowAnalysisState &CurState)
-      : CFCtx(CFCtx), BlockToState(BlockToState), CurBlock(CurBlock),
+      : CFCtx(CFCtx), BlockToState(BlockToState), CurBlockID(CurBlockID),
         CurState(CurState) {}
 
   /// Returns the environment of the basic block that contains `S`.
@@ -44,7 +44,7 @@ class StmtToEnvMap {
 private:
   const ControlFlowContext &CFCtx;
   llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState;
-  const CFGBlock &CurBlock;
+  unsigned CurBlockID;
   const TypeErasedDataflowAnalysisState &CurState;
 };
 
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 750c579764ebea..2271a75fbcaf70 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -42,7 +42,7 @@ const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
   assert(BlockIt != CFCtx.getStmtToBlock().end());
   if (!CFCtx.isBlockReachable(*BlockIt->getSecond()))
     return nullptr;
-  if (BlockIt->getSecond() == &CurBlock)
+  if (BlockIt->getSecond()->getBlockID() == CurBlockID)
     return &CurState.Env;
   const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
   if (!(State))
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 5ee16123e76f28..0c68c291baac14 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -345,12 +345,12 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
 
 /// Built-in transfer function for `CFGStmt`.
 static void
-builtinTransferStatement(const CFGBlock &CurBlock, const CFGStmt &Elt,
+builtinTransferStatement(unsigned CurBlockID, const CFGStmt &Elt,
                          TypeErasedDataflowAnalysisState &InputState,
                          AnalysisContext &AC) {
   const Stmt *S = Elt.getStmt();
   assert(S != nullptr);
-  transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates, CurBlock, InputState), *S,
+  transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates, CurBlockID, InputState), *S,
            InputState.Env);
 }
 
@@ -418,12 +418,12 @@ builtinTransferInitializer(const CFGInitializer &Elt,
   }
 }
 
-static void builtinTransfer(const CFGBlock &CurBlock, const CFGElement &Elt,
+static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt,
                             TypeErasedDataflowAnalysisState &State,
                             AnalysisContext &AC) {
   switch (Elt.getKind()) {
   case CFGElement::Statement:
-    builtinTransferStatement(CurBlock, Elt.castAs<CFGStmt>(), State, AC);
+    builtinTransferStatement(CurBlockID, Elt.castAs<CFGStmt>(), State, AC);
     break;
   case CFGElement::Initializer:
     builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State);
@@ -467,7 +467,7 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
     AC.Log.enterElement(Element);
     // Built-in analysis
     if (AC.Analysis.builtinOptions()) {
-      builtinTransfer(Block, Element, State, AC);
+      builtinTransfer(Block.getBlockID(), Element, State, AC);
     }
 
     // User-provided analysis
@@ -493,8 +493,9 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
       // takes a `CFGElement` as input, but some expressions only show up as a
       // terminator condition, but not as a `CFGElement`. The condition of an if
       // statement is one such example.
-      transfer(StmtToEnvMap(AC.CFCtx, AC.BlockStates, Block, State),
-               *TerminatorCond, State.Env);
+      transfer(
+          StmtToEnvMap(AC.CFCtx, AC.BlockStates, Block.getBlockID(), State),
+          *TerminatorCond, State.Env);
 
     // If the transfer function didn't produce a value, create an atom so that
     // we have *some* value for the condition expression. This ensures that



More information about the cfe-commits mailing list