[clang] 8dcaf3a - [clang][dataflow] Implement a basic algorithm for dataflow analysis

Dmitri Gribenko via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 10 02:44:56 PST 2021


Author: Stanislav Gatev
Date: 2021-12-10T11:44:49+01:00
New Revision: 8dcaf3aa0bf25508700a7452ed963c1487221dfd

URL: https://github.com/llvm/llvm-project/commit/8dcaf3aa0bf25508700a7452ed963c1487221dfd
DIFF: https://github.com/llvm/llvm-project/commit/8dcaf3aa0bf25508700a7452ed963c1487221dfd.diff

LOG: [clang][dataflow] Implement a basic algorithm for dataflow analysis

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed By: xazax.hun, gribozavr2

Differential Revision: https://reviews.llvm.org/D115235

Added: 
    clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
    clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp

Modified: 
    clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
    clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
    clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
    clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
    clang/unittests/Analysis/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 69a5c2e47b66d..4a3c0239f8e12 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -15,11 +15,20 @@
 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
 
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+
 namespace clang {
 namespace dataflow {
 
 /// Holds the state of the program (store and heap) at a given program point.
-class Environment {};
+class Environment {
+public:
+  bool operator==(const Environment &) const { return true; }
+
+  LatticeJoinEffect join(const Environment &) {
+    return LatticeJoinEffect::Unchanged;
+  }
+};
 
 } // namespace dataflow
 } // namespace clang

diff  --git a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
index 90095735ad3d5..52d84eb13c568 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
@@ -61,11 +61,12 @@ struct ReversePostOrderCompare {
 /// the same block multiple times at once.
 struct ForwardDataflowWorklist
     : DataflowWorklistBase<ReversePostOrderCompare, 20> {
+  ForwardDataflowWorklist(const CFG &Cfg, PostOrderCFGView *POV)
+      : DataflowWorklistBase(Cfg, POV,
+                             ReversePostOrderCompare{POV->getComparator()}) {}
+
   ForwardDataflowWorklist(const CFG &Cfg, AnalysisDeclContext &Ctx)
-      : DataflowWorklistBase(
-            Cfg, Ctx.getAnalysis<PostOrderCFGView>(),
-            ReversePostOrderCompare{
-                Ctx.getAnalysis<PostOrderCFGView>()->getComparator()}) {}
+      : ForwardDataflowWorklist(Cfg, Ctx.getAnalysis<PostOrderCFGView>()) {}
 
   void enqueueSuccessors(const CFGBlock *Block) {
     for (auto B : Block->succs())

diff  --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index 9448b911f4718..55fae246da795 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -76,6 +76,20 @@ struct TypeErasedDataflowAnalysisState {
   Environment Env;
 };
 
+/// Transfers the state of a basic block by evaluating each of its statements in
+/// the context of `Analysis` and the states of its predecessors that are
+/// available in `BlockStates`.
+///
+/// Requirements:
+///
+///   All predecessors of `Block` except those with loop back edges must have
+///   already been transferred. States in `BlockStates` that are set to
+///   `llvm::None` represent basic blocks that are not evaluated yet.
+TypeErasedDataflowAnalysisState transferBlock(
+    std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+    const CFGBlock &Block, const Environment &InitEnv,
+    TypeErasedDataflowAnalysis &Analysis);
+
 /// Performs dataflow analysis and returns a mapping from basic block IDs to
 /// dataflow analysis states that model the respective basic blocks. Indices
 /// of the returned vector correspond to basic block IDs.

diff  --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index bb7eb99710681..45afd59728e14 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -11,15 +11,77 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <utility>
 #include <vector>
 
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
 
-using namespace clang;
-using namespace dataflow;
+namespace clang {
+namespace dataflow {
+
+/// Computes the input state for a given basic block by joining the output
+/// states of its predecessors.
+///
+/// Requirements:
+///
+///   All predecessors of `Block` except those with loop back edges must have
+///   already been transferred. States in `BlockStates` that are set to
+///   `llvm::None` represent basic blocks that are not evaluated yet.
+static TypeErasedDataflowAnalysisState computeBlockInputState(
+    std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+    const CFGBlock &Block, const Environment &InitEnv,
+    TypeErasedDataflowAnalysis &Analysis) {
+  // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
+  // to enable building analyses like computation of dominators that initialize
+  // the state of each basic block 
diff erently.
+  TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(),
+                                           InitEnv};
+  for (const CFGBlock *Pred : Block.preds()) {
+    // Skip if the `Block` is unreachable or control flow cannot get past it.
+    if (!Pred || Pred->hasNoReturnElement())
+      continue;
+
+    // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
+    // loop back edge to `Block`.
+    const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
+        BlockStates[Pred->getBlockID()];
+    if (!MaybePredState.hasValue())
+      continue;
+
+    const TypeErasedDataflowAnalysisState &PredState =
+        MaybePredState.getValue();
+    Analysis.joinTypeErased(State.Lattice, PredState.Lattice);
+    State.Env.join(PredState.Env);
+  }
+  return State;
+}
+
+TypeErasedDataflowAnalysisState transferBlock(
+    std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+    const CFGBlock &Block, const Environment &InitEnv,
+    TypeErasedDataflowAnalysis &Analysis) {
+  TypeErasedDataflowAnalysisState State =
+      computeBlockInputState(BlockStates, Block, InitEnv, Analysis);
+  for (const CFGElement &Element : Block) {
+    // FIXME: Evaluate other kinds of `CFGElement`.
+    const llvm::Optional<CFGStmt> Stmt = Element.getAs<CFGStmt>();
+    if (!Stmt.hasValue())
+      continue;
+
+    // FIXME: Evaluate the statement contained in `Stmt`.
+
+    State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(),
+                                                State.Lattice, State.Env);
+  }
+  return State;
+}
 
 std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>
 runTypeErasedDataflowAnalysis(const CFG &Cfg,
@@ -29,7 +91,59 @@ runTypeErasedDataflowAnalysis(const CFG &Cfg,
   // are specified in the header. This could be done by remembering
   // what options were used to build `Cfg` and asserting on them here.
 
-  // FIXME: Implement work list-based algorithm to compute the fixed
-  // point of `Analysis::transform` for every basic block in `Cfg`.
-  return {};
+  PostOrderCFGView POV(&Cfg);
+  ForwardDataflowWorklist Worklist(Cfg, &POV);
+
+  std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
+  BlockStates.resize(Cfg.size(), llvm::None);
+
+  // The entry basic block doesn't contain statements so it can be skipped.
+  const CFGBlock &Entry = Cfg.getEntry();
+  BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
+                                     InitEnv};
+  Worklist.enqueueSuccessors(&Entry);
+
+  // Bugs in lattices and transfer functions can prevent the analysis from
+  // converging. To limit the damage (infinite loops) that these bugs can cause,
+  // limit the number of iterations.
+  // FIXME: Consider making the maximum number of iterations configurable.
+  // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
+  // of iterations, number of functions that time out, etc.
+  unsigned Iterations = 0;
+  static constexpr unsigned MaxIterations = 1 << 16;
+  while (const CFGBlock *Block = Worklist.dequeue()) {
+    if (++Iterations > MaxIterations) {
+      llvm::errs() << "Maximum number of iterations reached, giving up.\n";
+      break;
+    }
+
+    const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
+        BlockStates[Block->getBlockID()];
+    TypeErasedDataflowAnalysisState NewBlockState =
+        transferBlock(BlockStates, *Block, InitEnv, Analysis);
+
+    if (OldBlockState.hasValue() &&
+        Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice,
+                                   NewBlockState.Lattice) &&
+        OldBlockState->Env == NewBlockState.Env) {
+      // The state of `Block` didn't change after transfer so there's no need to
+      // revisit its successors.
+      continue;
+    }
+
+    BlockStates[Block->getBlockID()] = std::move(NewBlockState);
+
+    // Do not add unreachable successor blocks to `Worklist`.
+    if (Block->hasNoReturnElement())
+      continue;
+
+    Worklist.enqueueSuccessors(Block);
+  }
+  // FIXME: Consider evaluating unreachable basic blocks (those that have a
+  // state set to `llvm::None` at this point) to also analyze dead code.
+
+  return BlockStates;
 }
+
+} // namespace dataflow
+} // namespace clang

diff  --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt
index 00026874417bb..7e2a00b96057a 100644
--- a/clang/unittests/Analysis/CMakeLists.txt
+++ b/clang/unittests/Analysis/CMakeLists.txt
@@ -28,3 +28,5 @@ target_link_libraries(ClangAnalysisTests
   PRIVATE
   LLVMTestingSupport
   )
+
+add_subdirectory(FlowSensitive)

diff  --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
new file mode 100644
index 0000000000000..d463e31bea3bd
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_unittest(ClangAnalysisFlowSensitiveTests
+  TypeErasedDataflowAnalysisTest.cpp
+  )
+
+clang_target_link_libraries(ClangAnalysisFlowSensitiveTests
+  PRIVATE
+  clangAnalysis
+  clangAnalysisFlowSensitive
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangFrontend
+  clangTesting
+  clangTooling
+  )
+

diff  --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
new file mode 100644
index 0000000000000..61765eb1404a2
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -0,0 +1,148 @@
+//===- unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Decl.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cassert>
+#include <memory>
+#include <vector>
+
+using namespace clang;
+using namespace dataflow;
+
+template <typename AnalysisT>
+class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
+public:
+  void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
+    assert(BlockStates.empty());
+
+    const auto *Func = Result.Nodes.getNodeAs<FunctionDecl>("func");
+    assert(Func != nullptr);
+
+    Stmt *Body = Func->getBody();
+    assert(Body != nullptr);
+
+    // FIXME: Consider providing a utility that returns a `CFG::BuildOptions`
+    // which is a good default for most clients or a utility that directly
+    // builds the `CFG` using default `CFG::BuildOptions`.
+    CFG::BuildOptions Options;
+    Options.AddImplicitDtors = true;
+    Options.AddTemporaryDtors = true;
+    Options.setAllAlwaysAdd();
+
+    std::unique_ptr<CFG> Cfg =
+        CFG::buildCFG(nullptr, Body, Result.Context, Options);
+    assert(Cfg != nullptr);
+
+    AnalysisT Analysis(*Result.Context);
+    Environment Env;
+    BlockStates = runDataflowAnalysis(*Cfg, Analysis, Env);
+  }
+
+  std::vector<
+      llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+      BlockStates;
+};
+
+template <typename AnalysisT>
+std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+runAnalysis(llvm::StringRef Code) {
+  std::unique_ptr<ASTUnit> AST =
+      tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
+
+  AnalysisCallback<AnalysisT> Callback;
+  ast_matchers::MatchFinder Finder;
+  Finder.addMatcher(
+      ast_matchers::functionDecl(ast_matchers::hasName("target")).bind("func"),
+      &Callback);
+  Finder.matchAST(AST->getASTContext());
+
+  return Callback.BlockStates;
+}
+
+class NoopLattice {
+public:
+  bool operator==(const NoopLattice &) const { return true; }
+
+  LatticeJoinEffect join(const NoopLattice &) {
+    return LatticeJoinEffect::Unchanged;
+  }
+};
+
+class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> {
+public:
+  NoopAnalysis(ASTContext &Context)
+      : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context) {}
+
+  static NoopLattice initialElement() { return {}; }
+
+  NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) {
+    return {};
+  }
+};
+
+TEST(DataflowAnalysisTest, NoopAnalysis) {
+  auto BlockStates = runAnalysis<NoopAnalysis>(R"(
+    void target() {}
+  )");
+  EXPECT_EQ(BlockStates.size(), 2u);
+  EXPECT_TRUE(BlockStates[0].hasValue());
+  EXPECT_TRUE(BlockStates[1].hasValue());
+}
+
+struct NonConvergingLattice {
+  int State;
+
+  bool operator==(const NonConvergingLattice &Other) const {
+    return State == Other.State;
+  }
+
+  LatticeJoinEffect join(const NonConvergingLattice &Other) {
+    if (Other.State == 0)
+      return LatticeJoinEffect::Unchanged;
+    State += Other.State;
+    return LatticeJoinEffect::Changed;
+  }
+};
+
+class NonConvergingAnalysis
+    : public DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice> {
+public:
+  explicit NonConvergingAnalysis(ASTContext &Context)
+      : DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice>(Context) {
+  }
+
+  static NonConvergingLattice initialElement() { return {0}; }
+
+  NonConvergingLattice transfer(const Stmt *S, const NonConvergingLattice &E,
+                                Environment &Env) {
+    return {E.State + 1};
+  }
+};
+
+TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
+  auto BlockStates = runAnalysis<NonConvergingAnalysis>(R"(
+    void target() {
+      while(true) {}
+    }
+  )");
+  EXPECT_EQ(BlockStates.size(), 4u);
+  EXPECT_FALSE(BlockStates[0].hasValue());
+  EXPECT_TRUE(BlockStates[1].hasValue());
+  EXPECT_TRUE(BlockStates[2].hasValue());
+  EXPECT_TRUE(BlockStates[3].hasValue());
+}


        


More information about the cfe-commits mailing list