[clang] [clang][dataflow] Make cap on block visits configurable by caller. (PR #77481)
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 22 10:46:46 PST 2024
https://github.com/ymand updated https://github.com/llvm/llvm-project/pull/77481
>From 1e37ced06198e9d8b7c583e01f22e53508182902 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Thu, 4 Jan 2024 15:36:40 +0000
Subject: [PATCH 1/3] [clang][dataflow] Make cap on block visits configurable
by caller.
Previously, we hard-coded the cap on block visits inside the framework. This
patch enables the caller to specify the cap in the APIs for running an analysis.
---
.../Analysis/FlowSensitive/DataflowAnalysis.h | 27 +++++++++++++++---
.../TypeErasedDataflowAnalysis.h | 9 +++++-
.../TypeErasedDataflowAnalysis.cpp | 28 ++++++++-----------
3 files changed, 42 insertions(+), 22 deletions(-)
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 1dffbe8a09c3609..6426fc1a3aff148 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -186,6 +186,14 @@ template <typename LatticeT> struct DataflowAnalysisState {
/// the dataflow analysis cannot be performed successfully. Otherwise, calls
/// `PostVisitCFG` on each CFG element with the final analysis results at that
/// program point.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging. The default value is essentially arbitrary -- large enough
+/// to accomodate what seems like any reasonable CFG, but still small enough to
+/// limit the cost of hitting the limit.
template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
@@ -194,7 +202,8 @@ runDataflowAnalysis(
const Environment &InitEnv,
std::function<void(const CFGElement &, const DataflowAnalysisState<
typename AnalysisT::Lattice> &)>
- PostVisitCFG = nullptr) {
+ PostVisitCFG = nullptr,
+ std::int32_t MaxBlockVisits = 20'000) {
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
PostVisitCFGClosure = nullptr;
@@ -212,7 +221,7 @@ runDataflowAnalysis(
}
auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
- CFCtx, Analysis, InitEnv, PostVisitCFGClosure);
+ CFCtx, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
if (!TypeErasedBlockStates)
return TypeErasedBlockStates.takeError();
@@ -261,6 +270,14 @@ auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
/// iterations.
/// - This limit is still low enough to keep runtimes acceptable (on typical
/// machines) in cases where we hit the limit.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging. The default value is essentially arbitrary -- large enough
+/// to accomodate what seems like any reasonable CFG, but still small enough to
+/// limit the cost of hitting the limit.
template <typename AnalysisT, typename Diagnostic>
llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
@@ -268,7 +285,8 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
const CFGElement &, ASTContext &,
const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>
Diagnoser,
- std::int64_t MaxSATIterations = 1'000'000'000) {
+ std::int64_t MaxSATIterations = 1'000'000'000,
+ std::int32_t MaxBlockVisits = 20'000) {
llvm::Expected<ControlFlowContext> Context =
ControlFlowContext::build(FuncDecl);
if (!Context)
@@ -293,7 +311,8 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
State.Lattice.Value),
State.Env));
llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
- })
+ },
+ MaxBlockVisits)
.takeError())
return std::move(Err);
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index 67c323dbf45e1b2..edc582ac938fa21 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -138,13 +138,20 @@ struct TypeErasedDataflowAnalysisState {
/// dataflow analysis cannot be performed successfully. Otherwise, calls
/// `PostVisitCFG` on each CFG element with the final analysis results at that
/// program point.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging.
llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(
const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
const Environment &InitEnv,
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
- PostVisitCFG = nullptr);
+ PostVisitCFG,
+ std::int32_t MaxBlockVisits);
} // namespace dataflow
} // namespace clang
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index faf83a8920d4ead..f1899590f412535 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -498,7 +498,8 @@ runTypeErasedDataflowAnalysis(
const Environment &InitEnv,
std::function<void(const CFGElement &,
const TypeErasedDataflowAnalysisState &)>
- PostVisitCFG) {
+ PostVisitCFG,
+ std::int32_t MaxBlockVisits) {
PrettyStackTraceAnalysis CrashInfo(CFCtx, "runTypeErasedDataflowAnalysis");
std::optional<Environment> MaybeStartingEnv;
@@ -524,27 +525,20 @@ runTypeErasedDataflowAnalysis(
AnalysisContext AC(CFCtx, Analysis, StartingEnv, BlockStates);
- // Bugs in lattices and transfer functions can prevent the analysis from
- // converging. To limit the damage (infinite loops) that these bugs can cause,
- // limit the number of iterations.
- // FIXME: Consider making the maximum number of iterations configurable.
- // FIXME: Consider restricting the number of backedges followed, rather than
- // iterations.
- // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
- // of iterations, number of functions that time out, etc.
- static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
- static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
- const uint32_t RelativeMaxIterations =
+ // FIXME: remove relative cap. There isn't really any good setting for
+ // `MaxAverageVisitsPerBlock`, so it has no clear value over using
+ // `MaxBlockVisits` directly.
+ static constexpr std::int32_t MaxAverageVisitsPerBlock = 4;
+ const std::int32_t RelativeMaxBlockVisits =
MaxAverageVisitsPerBlock * BlockStates.size();
- const uint32_t MaxIterations =
- std::min(RelativeMaxIterations, AbsoluteMaxIterations);
- uint32_t Iterations = 0;
+ MaxBlockVisits = std::min(RelativeMaxBlockVisits, MaxBlockVisits);
+ std::int32_t BlockVisits = 0;
while (const CFGBlock *Block = Worklist.dequeue()) {
LLVM_DEBUG(llvm::dbgs()
<< "Processing Block " << Block->getBlockID() << "\n");
- if (++Iterations > MaxIterations) {
+ if (++BlockVisits > MaxBlockVisits) {
return llvm::createStringError(std::errc::timed_out,
- "maximum number of iterations reached");
+ "maximum number of blocks processed");
}
const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState =
>From 7c5edafe85edd6e2fef5fe933014c0c59a6cae1e Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Wed, 10 Jan 2024 16:27:08 +0000
Subject: [PATCH 2/3] fixup! [clang][dataflow] Make cap on block visits
configurable by caller.
fix build and test failures
---
.../Analysis/FlowSensitive/TestingSupport.cpp | 2 +-
.../Analysis/FlowSensitive/TestingSupport.h | 13 ++++++++++---
.../Analysis/FlowSensitive/TransferTest.cpp | 3 +--
.../TypeErasedDataflowAnalysisTest.cpp | 2 +-
4 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
index 3726f56fc824d5d..09f5524e152c9fa 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
@@ -4,6 +4,7 @@
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
@@ -18,7 +19,6 @@
#include "gtest/gtest.h"
#include <cassert>
#include <functional>
-#include <memory>
#include <string>
#include <system_error>
#include <utility>
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
index 95ffcbd6f322ec7..0d36d2802897fd1 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
@@ -33,7 +33,7 @@
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/MatchSwitch.h"
-#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/NoopLattice.h"
#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
#include "clang/Basic/LLVM.h"
#include "clang/Serialization/PCHContainerOperations.h"
@@ -62,6 +62,11 @@ std::ostream &operator<<(std::ostream &OS,
namespace test {
+// Caps the number of block visits in any individual analysis. Given that test
+// code is typically quite small, we set a low number to help catch any problems
+// early. But, the choice is arbitrary.
+constexpr std::int32_t MaxBlockVisitsInAnalysis = 2'000;
+
/// Returns the environment at the program point marked with `Annotation` from
/// the mapping of annotated program points to analysis state.
///
@@ -277,8 +282,10 @@ checkDataflow(AnalysisInputs<AnalysisT> AI,
// If successful, the dataflow analysis returns a mapping from block IDs to
// the post-analysis states for the CFG blocks that have been evaluated.
llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
- MaybeBlockStates = runTypeErasedDataflowAnalysis(
- CFCtx, Analysis, InitEnv, TypeErasedPostVisitCFG);
+ MaybeBlockStates =
+ runTypeErasedDataflowAnalysis(CFCtx, Analysis, InitEnv,
+ TypeErasedPostVisitCFG,
+ MaxBlockVisitsInAnalysis);
if (!MaybeBlockStates) return MaybeBlockStates.takeError();
AO.BlockStates = *MaybeBlockStates;
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 85ae24f0b6f165a..c777aca78992d17 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -12,14 +12,13 @@
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
#include "clang/Analysis/FlowSensitive/RecordOps.h"
#include "clang/Analysis/FlowSensitive/StorageLocation.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/LangStandard.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index fe5ba5ab5426f76..466d33358fd388b 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -263,7 +263,7 @@ TEST_F(DataflowAnalysisTest, NonConvergingAnalysis) {
auto Res = runAnalysis<NonConvergingAnalysis>(
Code, [](ASTContext &C) { return NonConvergingAnalysis(C); });
EXPECT_EQ(llvm::toString(Res.takeError()),
- "maximum number of iterations reached");
+ "maximum number of blocks processed");
}
// Regression test for joins of bool-typed lvalue expressions. The first loop
>From 83bc3d95b87e22bc0442e16ed9e109b7544d797b Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Fri, 19 Jan 2024 19:54:38 +0000
Subject: [PATCH 3/3] fixup! [clang][dataflow] Make cap on block visits
configurable by caller.
address comments
---
.../Analysis/FlowSensitive/DataflowAnalysis.h | 22 +++++++------------
.../TypeErasedDataflowAnalysis.h | 2 +-
2 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 6426fc1a3aff148..b95095d2184c0ec 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -187,13 +187,11 @@ template <typename LatticeT> struct DataflowAnalysisState {
/// `PostVisitCFG` on each CFG element with the final analysis results at that
/// program point.
///
-/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
-/// distinguish between repeat visits to the same block and visits to distinct
-/// blocks. This parameter is a backstop to prevent infintite loops, in the case
-/// of bugs in the lattice and/or transfer functions that prevent the analysis
-/// from converging. The default value is essentially arbitrary -- large enough
-/// to accomodate what seems like any reasonable CFG, but still small enough to
-/// limit the cost of hitting the limit.
+/// `MaxBlockVisits` caps the number of block visits during analysis. See
+/// `runTypeErasedDataflowAnalysis` for a full description. The default value is
+/// essentially arbitrary -- large enough to accommodate what seems like any
+/// reasonable CFG, but still small enough to limit the cost of hitting the
+/// limit.
template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
@@ -271,13 +269,9 @@ auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
/// - This limit is still low enough to keep runtimes acceptable (on typical
/// machines) in cases where we hit the limit.
///
-/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
-/// distinguish between repeat visits to the same block and visits to distinct
-/// blocks. This parameter is a backstop to prevent infintite loops, in the case
-/// of bugs in the lattice and/or transfer functions that prevent the analysis
-/// from converging. The default value is essentially arbitrary -- large enough
-/// to accomodate what seems like any reasonable CFG, but still small enough to
-/// limit the cost of hitting the limit.
+/// `MaxBlockVisits` caps the number of block visits during analysis. See
+/// `runDataflowAnalysis` for a full description and explanation of the default
+/// value.
template <typename AnalysisT, typename Diagnostic>
llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index edc582ac938fa21..a0ca7440230b04c 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -141,7 +141,7 @@ struct TypeErasedDataflowAnalysisState {
///
/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
/// distinguish between repeat visits to the same block and visits to distinct
-/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// blocks. This parameter is a backstop to prevent infinite loops, in the case
/// of bugs in the lattice and/or transfer functions that prevent the analysis
/// from converging.
llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
More information about the cfe-commits
mailing list