[clang] 8dcaf3a - [clang][dataflow] Implement a basic algorithm for dataflow analysis
Dmitri Gribenko via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 10 02:44:56 PST 2021
Author: Stanislav Gatev
Date: 2021-12-10T11:44:49+01:00
New Revision: 8dcaf3aa0bf25508700a7452ed963c1487221dfd
URL: https://github.com/llvm/llvm-project/commit/8dcaf3aa0bf25508700a7452ed963c1487221dfd
DIFF: https://github.com/llvm/llvm-project/commit/8dcaf3aa0bf25508700a7452ed963c1487221dfd.diff
LOG: [clang][dataflow] Implement a basic algorithm for dataflow analysis
This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.
Reviewed By: xazax.hun, gribozavr2
Differential Revision: https://reviews.llvm.org/D115235
Added:
clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
Modified:
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
clang/unittests/Analysis/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 69a5c2e47b66d..4a3c0239f8e12 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -15,11 +15,20 @@
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+
namespace clang {
namespace dataflow {
/// Holds the state of the program (store and heap) at a given program point.
-class Environment {};
+class Environment {
+public:
+ bool operator==(const Environment &) const { return true; }
+
+ LatticeJoinEffect join(const Environment &) {
+ return LatticeJoinEffect::Unchanged;
+ }
+};
} // namespace dataflow
} // namespace clang
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
index 90095735ad3d5..52d84eb13c568 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h
@@ -61,11 +61,12 @@ struct ReversePostOrderCompare {
/// the same block multiple times at once.
struct ForwardDataflowWorklist
: DataflowWorklistBase<ReversePostOrderCompare, 20> {
+ ForwardDataflowWorklist(const CFG &Cfg, PostOrderCFGView *POV)
+ : DataflowWorklistBase(Cfg, POV,
+ ReversePostOrderCompare{POV->getComparator()}) {}
+
ForwardDataflowWorklist(const CFG &Cfg, AnalysisDeclContext &Ctx)
- : DataflowWorklistBase(
- Cfg, Ctx.getAnalysis<PostOrderCFGView>(),
- ReversePostOrderCompare{
- Ctx.getAnalysis<PostOrderCFGView>()->getComparator()}) {}
+ : ForwardDataflowWorklist(Cfg, Ctx.getAnalysis<PostOrderCFGView>()) {}
void enqueueSuccessors(const CFGBlock *Block) {
for (auto B : Block->succs())
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index 9448b911f4718..55fae246da795 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -76,6 +76,20 @@ struct TypeErasedDataflowAnalysisState {
Environment Env;
};
+/// Transfers the state of a basic block by evaluating each of its statements in
+/// the context of `Analysis` and the states of its predecessors that are
+/// available in `BlockStates`.
+///
+/// Requirements:
+///
+/// All predecessors of `Block` except those with loop back edges must have
+/// already been transferred. States in `BlockStates` that are set to
+/// `llvm::None` represent basic blocks that are not evaluated yet.
+TypeErasedDataflowAnalysisState transferBlock(
+ std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+ const CFGBlock &Block, const Environment &InitEnv,
+ TypeErasedDataflowAnalysis &Analysis);
+
/// Performs dataflow analysis and returns a mapping from basic block IDs to
/// dataflow analysis states that model the respective basic blocks. Indices
/// of the returned vector correspond to basic block IDs.
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index bb7eb99710681..45afd59728e14 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -11,15 +11,77 @@
//
//===----------------------------------------------------------------------===//
+#include <utility>
#include <vector>
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
-using namespace clang;
-using namespace dataflow;
+namespace clang {
+namespace dataflow {
+
+/// Computes the input state for a given basic block by joining the output
+/// states of its predecessors.
+///
+/// Requirements:
+///
+/// All predecessors of `Block` except those with loop back edges must have
+/// already been transferred. States in `BlockStates` that are set to
+/// `llvm::None` represent basic blocks that are not evaluated yet.
+static TypeErasedDataflowAnalysisState computeBlockInputState(
+ std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+ const CFGBlock &Block, const Environment &InitEnv,
+ TypeErasedDataflowAnalysis &Analysis) {
+ // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
+ // to enable building analyses like computation of dominators that initialize
+ // the state of each basic block
diff erently.
+ TypeErasedDataflowAnalysisState State = {Analysis.typeErasedInitialElement(),
+ InitEnv};
+ for (const CFGBlock *Pred : Block.preds()) {
+ // Skip if the `Block` is unreachable or control flow cannot get past it.
+ if (!Pred || Pred->hasNoReturnElement())
+ continue;
+
+ // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
+ // loop back edge to `Block`.
+ const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
+ BlockStates[Pred->getBlockID()];
+ if (!MaybePredState.hasValue())
+ continue;
+
+ const TypeErasedDataflowAnalysisState &PredState =
+ MaybePredState.getValue();
+ Analysis.joinTypeErased(State.Lattice, PredState.Lattice);
+ State.Env.join(PredState.Env);
+ }
+ return State;
+}
+
+TypeErasedDataflowAnalysisState transferBlock(
+ std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
+ const CFGBlock &Block, const Environment &InitEnv,
+ TypeErasedDataflowAnalysis &Analysis) {
+ TypeErasedDataflowAnalysisState State =
+ computeBlockInputState(BlockStates, Block, InitEnv, Analysis);
+ for (const CFGElement &Element : Block) {
+ // FIXME: Evaluate other kinds of `CFGElement`.
+ const llvm::Optional<CFGStmt> Stmt = Element.getAs<CFGStmt>();
+ if (!Stmt.hasValue())
+ continue;
+
+ // FIXME: Evaluate the statement contained in `Stmt`.
+
+ State.Lattice = Analysis.transferTypeErased(Stmt.getValue().getStmt(),
+ State.Lattice, State.Env);
+ }
+ return State;
+}
std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>
runTypeErasedDataflowAnalysis(const CFG &Cfg,
@@ -29,7 +91,59 @@ runTypeErasedDataflowAnalysis(const CFG &Cfg,
// are specified in the header. This could be done by remembering
// what options were used to build `Cfg` and asserting on them here.
- // FIXME: Implement work list-based algorithm to compute the fixed
- // point of `Analysis::transform` for every basic block in `Cfg`.
- return {};
+ PostOrderCFGView POV(&Cfg);
+ ForwardDataflowWorklist Worklist(Cfg, &POV);
+
+ std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
+ BlockStates.resize(Cfg.size(), llvm::None);
+
+ // The entry basic block doesn't contain statements so it can be skipped.
+ const CFGBlock &Entry = Cfg.getEntry();
+ BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
+ InitEnv};
+ Worklist.enqueueSuccessors(&Entry);
+
+ // Bugs in lattices and transfer functions can prevent the analysis from
+ // converging. To limit the damage (infinite loops) that these bugs can cause,
+ // limit the number of iterations.
+ // FIXME: Consider making the maximum number of iterations configurable.
+ // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
+ // of iterations, number of functions that time out, etc.
+ unsigned Iterations = 0;
+ static constexpr unsigned MaxIterations = 1 << 16;
+ while (const CFGBlock *Block = Worklist.dequeue()) {
+ if (++Iterations > MaxIterations) {
+ llvm::errs() << "Maximum number of iterations reached, giving up.\n";
+ break;
+ }
+
+ const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
+ BlockStates[Block->getBlockID()];
+ TypeErasedDataflowAnalysisState NewBlockState =
+ transferBlock(BlockStates, *Block, InitEnv, Analysis);
+
+ if (OldBlockState.hasValue() &&
+ Analysis.isEqualTypeErased(OldBlockState.getValue().Lattice,
+ NewBlockState.Lattice) &&
+ OldBlockState->Env == NewBlockState.Env) {
+ // The state of `Block` didn't change after transfer so there's no need to
+ // revisit its successors.
+ continue;
+ }
+
+ BlockStates[Block->getBlockID()] = std::move(NewBlockState);
+
+ // Do not add unreachable successor blocks to `Worklist`.
+ if (Block->hasNoReturnElement())
+ continue;
+
+ Worklist.enqueueSuccessors(Block);
+ }
+ // FIXME: Consider evaluating unreachable basic blocks (those that have a
+ // state set to `llvm::None` at this point) to also analyze dead code.
+
+ return BlockStates;
}
+
+} // namespace dataflow
+} // namespace clang
diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt
index 00026874417bb..7e2a00b96057a 100644
--- a/clang/unittests/Analysis/CMakeLists.txt
+++ b/clang/unittests/Analysis/CMakeLists.txt
@@ -28,3 +28,5 @@ target_link_libraries(ClangAnalysisTests
PRIVATE
LLVMTestingSupport
)
+
+add_subdirectory(FlowSensitive)
diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
new file mode 100644
index 0000000000000..d463e31bea3bd
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_clang_unittest(ClangAnalysisFlowSensitiveTests
+ TypeErasedDataflowAnalysisTest.cpp
+ )
+
+clang_target_link_libraries(ClangAnalysisFlowSensitiveTests
+ PRIVATE
+ clangAnalysis
+ clangAnalysisFlowSensitive
+ clangAST
+ clangASTMatchers
+ clangBasic
+ clangFrontend
+ clangTesting
+ clangTooling
+ )
+
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
new file mode 100644
index 0000000000000..61765eb1404a2
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -0,0 +1,148 @@
+//===- unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Decl.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cassert>
+#include <memory>
+#include <vector>
+
+using namespace clang;
+using namespace dataflow;
+
+template <typename AnalysisT>
+class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
+public:
+ void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
+ assert(BlockStates.empty());
+
+ const auto *Func = Result.Nodes.getNodeAs<FunctionDecl>("func");
+ assert(Func != nullptr);
+
+ Stmt *Body = Func->getBody();
+ assert(Body != nullptr);
+
+ // FIXME: Consider providing a utility that returns a `CFG::BuildOptions`
+ // which is a good default for most clients or a utility that directly
+ // builds the `CFG` using default `CFG::BuildOptions`.
+ CFG::BuildOptions Options;
+ Options.AddImplicitDtors = true;
+ Options.AddTemporaryDtors = true;
+ Options.setAllAlwaysAdd();
+
+ std::unique_ptr<CFG> Cfg =
+ CFG::buildCFG(nullptr, Body, Result.Context, Options);
+ assert(Cfg != nullptr);
+
+ AnalysisT Analysis(*Result.Context);
+ Environment Env;
+ BlockStates = runDataflowAnalysis(*Cfg, Analysis, Env);
+ }
+
+ std::vector<
+ llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+ BlockStates;
+};
+
+template <typename AnalysisT>
+std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+runAnalysis(llvm::StringRef Code) {
+ std::unique_ptr<ASTUnit> AST =
+ tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
+
+ AnalysisCallback<AnalysisT> Callback;
+ ast_matchers::MatchFinder Finder;
+ Finder.addMatcher(
+ ast_matchers::functionDecl(ast_matchers::hasName("target")).bind("func"),
+ &Callback);
+ Finder.matchAST(AST->getASTContext());
+
+ return Callback.BlockStates;
+}
+
+class NoopLattice {
+public:
+ bool operator==(const NoopLattice &) const { return true; }
+
+ LatticeJoinEffect join(const NoopLattice &) {
+ return LatticeJoinEffect::Unchanged;
+ }
+};
+
+class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> {
+public:
+ NoopAnalysis(ASTContext &Context)
+ : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context) {}
+
+ static NoopLattice initialElement() { return {}; }
+
+ NoopLattice transfer(const Stmt *S, const NoopLattice &E, Environment &Env) {
+ return {};
+ }
+};
+
+TEST(DataflowAnalysisTest, NoopAnalysis) {
+ auto BlockStates = runAnalysis<NoopAnalysis>(R"(
+ void target() {}
+ )");
+ EXPECT_EQ(BlockStates.size(), 2u);
+ EXPECT_TRUE(BlockStates[0].hasValue());
+ EXPECT_TRUE(BlockStates[1].hasValue());
+}
+
+struct NonConvergingLattice {
+ int State;
+
+ bool operator==(const NonConvergingLattice &Other) const {
+ return State == Other.State;
+ }
+
+ LatticeJoinEffect join(const NonConvergingLattice &Other) {
+ if (Other.State == 0)
+ return LatticeJoinEffect::Unchanged;
+ State += Other.State;
+ return LatticeJoinEffect::Changed;
+ }
+};
+
+class NonConvergingAnalysis
+ : public DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice> {
+public:
+ explicit NonConvergingAnalysis(ASTContext &Context)
+ : DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice>(Context) {
+ }
+
+ static NonConvergingLattice initialElement() { return {0}; }
+
+ NonConvergingLattice transfer(const Stmt *S, const NonConvergingLattice &E,
+ Environment &Env) {
+ return {E.State + 1};
+ }
+};
+
+TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
+ auto BlockStates = runAnalysis<NonConvergingAnalysis>(R"(
+ void target() {
+ while(true) {}
+ }
+ )");
+ EXPECT_EQ(BlockStates.size(), 4u);
+ EXPECT_FALSE(BlockStates[0].hasValue());
+ EXPECT_TRUE(BlockStates[1].hasValue());
+ EXPECT_TRUE(BlockStates[2].hasValue());
+ EXPECT_TRUE(BlockStates[3].hasValue());
+}
More information about the cfe-commits
mailing list