[clang] [clang][dataflow] Fix bug in `buildContainsExprConsumedInDifferentBlock()`. (PR #100874)

via cfe-commits cfe-commits at lists.llvm.org
Sat Jul 27 06:48:48 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (martinboehme)

<details>
<summary>Changes</summary>

This was missing a call to `ignoreCFGOmittedNodes()`. As a result, the function
would erroneously conclude that a block did not contain an expression consumed
in a different block if the expression in question was surrounded by a
`ParenExpr` in the consuming block. The patch adds a test that triggers this
scenario (and fails without the fix).

To prevent this kind of bug in the future, the patch also adds a new method
`blockForStmt()` to `AdornedCFG` that calls `ignoreCFGOmittedNodes()` and is
preferred over accessing `getStmtToBlock()` directly.


---
Full diff: https://github.com/llvm/llvm-project/pull/100874.diff


5 Files Affected:

- (modified) clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h (+31-4) 
- (modified) clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp (+11-5) 
- (modified) clang/lib/Analysis/FlowSensitive/Transfer.cpp (+5-6) 
- (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+5-4) 
- (modified) clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp (+38-1) 


``````````diff
diff --git a/clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h b/clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
index 420f13ce11bfd..5de66fcb0e3af 100644
--- a/clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
+++ b/clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
@@ -18,6 +18,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Error.h"
@@ -27,6 +28,24 @@
 namespace clang {
 namespace dataflow {
 
+namespace internal {
+class StmtToBlockMap {
+public:
+  StmtToBlockMap(const CFG &Cfg);
+
+  const CFGBlock *lookup(const Stmt &S) const {
+    return StmtToBlock.lookup(&ignoreCFGOmittedNodes(S));
+  }
+
+  const llvm::DenseMap<const Stmt *, const CFGBlock *> &getMap() const {
+    return StmtToBlock;
+  }
+
+private:
+  llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock;
+};
+} // namespace internal
+
 /// Holds CFG with additional information derived from it that is needed to
 /// perform dataflow analysis.
 class AdornedCFG {
@@ -49,8 +68,17 @@ class AdornedCFG {
   const CFG &getCFG() const { return *Cfg; }
 
   /// Returns a mapping from statements to basic blocks that contain them.
+  /// Deprecated. Use `blockForStmt()` instead (which prevents the potential
+  /// error of forgetting to call `ignoreCFGOmittedNodes()` on the statement to
+  /// look up).
   const llvm::DenseMap<const Stmt *, const CFGBlock *> &getStmtToBlock() const {
-    return StmtToBlock;
+    return StmtToBlock.getMap();
+  }
+
+  /// Returns the basic block that contains `S`, or null if no basic block
+  /// containing `S` is found.
+  const CFGBlock *blockForStmt(const Stmt &S) const {
+    return StmtToBlock.lookup(S);
   }
 
   /// Returns whether `B` is reachable from the entry block.
@@ -73,8 +101,7 @@ class AdornedCFG {
 private:
   AdornedCFG(
       const Decl &D, std::unique_ptr<CFG> Cfg,
-      llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock,
-      llvm::BitVector BlockReachable,
+      internal::StmtToBlockMap StmtToBlock, llvm::BitVector BlockReachable,
       llvm::DenseSet<const CFGBlock *> ContainsExprConsumedInDifferentBlock)
       : ContainingDecl(D), Cfg(std::move(Cfg)),
         StmtToBlock(std::move(StmtToBlock)),
@@ -85,7 +112,7 @@ class AdornedCFG {
   /// The `Decl` containing the statement used to construct the CFG.
   const Decl &ContainingDecl;
   std::unique_ptr<CFG> Cfg;
-  llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock;
+  internal::StmtToBlockMap StmtToBlock;
   llvm::BitVector BlockReachable;
   llvm::DenseSet<const CFGBlock *> ContainsExprConsumedInDifferentBlock;
 };
diff --git a/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp b/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp
index 255543021a998..876b5a3db5249 100644
--- a/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp
+++ b/clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Error.h"
@@ -96,8 +97,7 @@ static llvm::BitVector findReachableBlocks(const CFG &Cfg) {
 
 static llvm::DenseSet<const CFGBlock *>
 buildContainsExprConsumedInDifferentBlock(
-    const CFG &Cfg,
-    const llvm::DenseMap<const Stmt *, const CFGBlock *> &StmtToBlock) {
+    const CFG &Cfg, const internal::StmtToBlockMap &StmtToBlock) {
   llvm::DenseSet<const CFGBlock *> Result;
 
   auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S,
@@ -105,7 +105,7 @@ buildContainsExprConsumedInDifferentBlock(
     for (const Stmt *Child : S->children()) {
       if (!isa_and_nonnull<Expr>(Child))
         continue;
-      const CFGBlock *ChildBlock = StmtToBlock.lookup(Child);
+      const CFGBlock *ChildBlock = StmtToBlock.lookup(*Child);
       if (ChildBlock != Block)
         Result.insert(ChildBlock);
     }
@@ -126,6 +126,13 @@ buildContainsExprConsumedInDifferentBlock(
   return Result;
 }
 
+namespace internal {
+
+StmtToBlockMap::StmtToBlockMap(const CFG &Cfg)
+    : StmtToBlock(buildStmtToBasicBlockMap(Cfg)) {}
+
+} // namespace internal
+
 llvm::Expected<AdornedCFG> AdornedCFG::build(const FunctionDecl &Func) {
   if (!Func.doesThisDeclarationHaveABody())
     return llvm::createStringError(
@@ -166,8 +173,7 @@ llvm::Expected<AdornedCFG> AdornedCFG::build(const Decl &D, Stmt &S,
         std::make_error_code(std::errc::invalid_argument),
         "CFG::buildCFG failed");
 
-  llvm::DenseMap<const Stmt *, const CFGBlock *> StmtToBlock =
-      buildStmtToBasicBlockMap(*Cfg);
+  internal::StmtToBlockMap StmtToBlock(*Cfg);
 
   llvm::BitVector BlockReachable = findReachableBlocks(*Cfg);
 
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 3c896d373a211..9c54eb16d2224 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -40,17 +40,16 @@ namespace clang {
 namespace dataflow {
 
 const Environment *StmtToEnvMap::getEnvironment(const Stmt &S) const {
-  auto BlockIt = ACFG.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
-  if (BlockIt == ACFG.getStmtToBlock().end()) {
+  const CFGBlock *Block = ACFG.blockForStmt(S);
+  if (Block == nullptr) {
     assert(false);
-    // Return null to avoid dereferencing the end iterator in non-assert builds.
     return nullptr;
   }
-  if (!ACFG.isBlockReachable(*BlockIt->getSecond()))
+  if (!ACFG.isBlockReachable(*Block))
     return nullptr;
-  if (BlockIt->getSecond()->getBlockID() == CurBlockID)
+  if (Block->getBlockID() == CurBlockID)
     return &CurState.Env;
-  const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
+  const auto &State = BlockToState[Block->getBlockID()];
   if (!(State))
     return nullptr;
   return &State->Env;
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 200682faafd6a..8afd18b315d28 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -243,10 +243,11 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
     // See `NoreturnDestructorTest` for concrete examples.
     if (Block.succ_begin()->getReachableBlock() != nullptr &&
         Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
-      auto &StmtToBlock = AC.ACFG.getStmtToBlock();
-      auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
-      assert(StmtBlock != StmtToBlock.end());
-      llvm::erase(Preds, StmtBlock->getSecond());
+      const CFGBlock *StmtBlock = nullptr;
+      if (const Stmt *Terminator = Block.getTerminatorStmt())
+        StmtBlock = AC.ACFG.blockForStmt(*Terminator);
+      assert(StmtBlock != nullptr);
+      llvm::erase(Preds, StmtBlock);
     }
   }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 1a52b82d65665..8717d9753d161 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -9,6 +9,7 @@
 #include "TestingSupport.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
@@ -79,7 +80,7 @@ class DataflowAnalysisTest : public Test {
 
   /// Returns the `CFGBlock` containing `S` (and asserts that it exists).
   const CFGBlock *blockForStmt(const Stmt &S) {
-    const CFGBlock *Block = ACFG->getStmtToBlock().lookup(&S);
+    const CFGBlock *Block = ACFG->blockForStmt(S);
     assert(Block != nullptr);
     return Block;
   }
@@ -370,6 +371,42 @@ TEST_F(DiscardExprStateTest, ConditionalOperator) {
   EXPECT_EQ(CallGState.Env.get<PointerValue>(AddrOfI), nullptr);
 }
 
+TEST_F(DiscardExprStateTest, CallWithParenExprTreatedCorrectly) {
+  // This is a regression test.
+  // In the CFG for `target()` below, the expression that evaluates the function
+  // pointer for `expect` and the actual call are separated into different
+  // baseic blocks (because of the control flow introduced by the `||`
+  // operator).
+  // The value for the `expect` function pointer was erroneously discarded
+  // from the environment between these two blocks because the code that
+  // determines whether the expression values for a block need to be preserved
+  // did not ignore the `ParenExpr` around `(i == 1)` (which is not represented
+  // in the CFG).
+  std::string Code = R"(
+    bool expect(bool, bool);
+    void target(int i) {
+      expect(false || (i == 1), false);
+    }
+  )";
+  auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
+      Code, [](ASTContext &C) { return NoopAnalysis(C); }));
+
+  const auto &FnToPtrDecay = matchNode<ImplicitCastExpr>(
+      implicitCastExpr(hasCastKind(CK_FunctionToPointerDecay)));
+  const auto &CallExpect =
+      matchNode<CallExpr>(callExpr(callee(functionDecl(hasName("expect")))));
+
+  // In the block that evaluates the implicit cast of `expect` to a pointer,
+  // this expression is associated with a value.
+  const auto &FnToPtrDecayState = blockStateForStmt(BlockStates, FnToPtrDecay);
+  EXPECT_NE(FnToPtrDecayState.Env.getValue(FnToPtrDecay), nullptr);
+
+  // In the block that calls `expect()`, the implicit cast of `expect` to a
+  // pointer is still associated with a value.
+  const auto &CallExpectState = blockStateForStmt(BlockStates, CallExpect);
+  EXPECT_NE(CallExpectState.Env.getValue(FnToPtrDecay), nullptr);
+}
+
 struct NonConvergingLattice {
   int State;
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/100874


More information about the cfe-commits mailing list