[clang] [clang][dataflow] Make cap on block visits configurable by caller. (PR #77481)

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 9 06:56:44 PST 2024


https://github.com/ymand created https://github.com/llvm/llvm-project/pull/77481

Previously, we hard-coded the cap on block visits inside the framework. This
patch enables the caller to specify the cap in the APIs for running an analysis.


>From 34121d09cbb833c6462d516767f14b2cd05db82d Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Thu, 4 Jan 2024 15:36:40 +0000
Subject: [PATCH] [clang][dataflow] Make cap on block visits configurable by
 caller.

Previously, we hard-coded the cap on block visits inside the framework. This
patch enables the caller to specify the cap in the APIs for running an analysis.
---
 .../Analysis/FlowSensitive/DataflowAnalysis.h | 27 +++++++++++++++---
 .../TypeErasedDataflowAnalysis.h              |  9 +++++-
 .../TypeErasedDataflowAnalysis.cpp            | 28 ++++++++-----------
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 1dffbe8a09c360..6426fc1a3aff14 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -186,6 +186,14 @@ template <typename LatticeT> struct DataflowAnalysisState {
 /// the dataflow analysis cannot be performed successfully. Otherwise, calls
 /// `PostVisitCFG` on each CFG element with the final analysis results at that
 /// program point.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging. The default value is essentially arbitrary -- large enough
+/// to accomodate what seems like any reasonable CFG, but still small enough to
+/// limit the cost of hitting the limit.
 template <typename AnalysisT>
 llvm::Expected<std::vector<
     std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
@@ -194,7 +202,8 @@ runDataflowAnalysis(
     const Environment &InitEnv,
     std::function<void(const CFGElement &, const DataflowAnalysisState<
                                                typename AnalysisT::Lattice> &)>
-        PostVisitCFG = nullptr) {
+        PostVisitCFG = nullptr,
+    std::int32_t MaxBlockVisits = 20'000) {
   std::function<void(const CFGElement &,
                      const TypeErasedDataflowAnalysisState &)>
       PostVisitCFGClosure = nullptr;
@@ -212,7 +221,7 @@ runDataflowAnalysis(
   }
 
   auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis(
-      CFCtx, Analysis, InitEnv, PostVisitCFGClosure);
+      CFCtx, Analysis, InitEnv, PostVisitCFGClosure, MaxBlockVisits);
   if (!TypeErasedBlockStates)
     return TypeErasedBlockStates.takeError();
 
@@ -261,6 +270,14 @@ auto createAnalysis(ASTContext &ASTCtx, Environment &Env)
 ///   iterations.
 /// - This limit is still low enough to keep runtimes acceptable (on typical
 ///   machines) in cases where we hit the limit.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging. The default value is essentially arbitrary -- large enough
+/// to accomodate what seems like any reasonable CFG, but still small enough to
+/// limit the cost of hitting the limit.
 template <typename AnalysisT, typename Diagnostic>
 llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
     const FunctionDecl &FuncDecl, ASTContext &ASTCtx,
@@ -268,7 +285,8 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
         const CFGElement &, ASTContext &,
         const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>
         Diagnoser,
-    std::int64_t MaxSATIterations = 1'000'000'000) {
+    std::int64_t MaxSATIterations = 1'000'000'000,
+    std::int32_t MaxBlockVisits = 20'000) {
   llvm::Expected<ControlFlowContext> Context =
       ControlFlowContext::build(FuncDecl);
   if (!Context)
@@ -293,7 +311,8 @@ llvm::Expected<llvm::SmallVector<Diagnostic>> diagnoseFunction(
                             State.Lattice.Value),
                         State.Env));
                 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics));
-              })
+              },
+              MaxBlockVisits)
               .takeError())
     return std::move(Err);
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index 67c323dbf45e1b..edc582ac938fa2 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -138,13 +138,20 @@ struct TypeErasedDataflowAnalysisState {
 /// dataflow analysis cannot be performed successfully. Otherwise, calls
 /// `PostVisitCFG` on each CFG element with the final analysis results at that
 /// program point.
+///
+/// `MaxBlockVisits` caps the number of block visits during analysis. It doesn't
+/// distinguish between repeat visits to the same block and visits to distinct
+/// blocks. This parameter is a backstop to prevent infintite loops, in the case
+/// of bugs in the lattice and/or transfer functions that prevent the analysis
+/// from converging.
 llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
 runTypeErasedDataflowAnalysis(
     const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
     const Environment &InitEnv,
     std::function<void(const CFGElement &,
                        const TypeErasedDataflowAnalysisState &)>
-        PostVisitCFG = nullptr);
+        PostVisitCFG,
+    std::int32_t MaxBlockVisits);
 
 } // namespace dataflow
 } // namespace clang
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index faf83a8920d4ea..f1899590f41253 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -498,7 +498,8 @@ runTypeErasedDataflowAnalysis(
     const Environment &InitEnv,
     std::function<void(const CFGElement &,
                        const TypeErasedDataflowAnalysisState &)>
-        PostVisitCFG) {
+        PostVisitCFG,
+    std::int32_t MaxBlockVisits) {
   PrettyStackTraceAnalysis CrashInfo(CFCtx, "runTypeErasedDataflowAnalysis");
 
   std::optional<Environment> MaybeStartingEnv;
@@ -524,27 +525,20 @@ runTypeErasedDataflowAnalysis(
 
   AnalysisContext AC(CFCtx, Analysis, StartingEnv, BlockStates);
 
-  // Bugs in lattices and transfer functions can prevent the analysis from
-  // converging. To limit the damage (infinite loops) that these bugs can cause,
-  // limit the number of iterations.
-  // FIXME: Consider making the maximum number of iterations configurable.
-  // FIXME: Consider restricting the number of backedges followed, rather than
-  // iterations.
-  // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
-  // of iterations, number of functions that time out, etc.
-  static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
-  static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
-  const uint32_t RelativeMaxIterations =
+  // FIXME: remove relative cap. There isn't really any good setting for
+  // `MaxAverageVisitsPerBlock`, so it has no clear value over using
+  // `MaxBlockVisits` directly.
+  static constexpr std::int32_t MaxAverageVisitsPerBlock = 4;
+  const std::int32_t RelativeMaxBlockVisits =
       MaxAverageVisitsPerBlock * BlockStates.size();
-  const uint32_t MaxIterations =
-      std::min(RelativeMaxIterations, AbsoluteMaxIterations);
-  uint32_t Iterations = 0;
+  MaxBlockVisits = std::min(RelativeMaxBlockVisits, MaxBlockVisits);
+  std::int32_t BlockVisits = 0;
   while (const CFGBlock *Block = Worklist.dequeue()) {
     LLVM_DEBUG(llvm::dbgs()
                << "Processing Block " << Block->getBlockID() << "\n");
-    if (++Iterations > MaxIterations) {
+    if (++BlockVisits > MaxBlockVisits) {
       return llvm::createStringError(std::errc::timed_out,
-                                     "maximum number of iterations reached");
+                                     "maximum number of blocks processed");
     }
 
     const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState =



More information about the cfe-commits mailing list