[clang] [analyzer] Avoid out-of-order node traversal on void return (PR #117863)

Wed Nov 27 02:27:04 PST 2024

https://github.com/necto updated https://github.com/llvm/llvm-project/pull/117863

>From c725ed73555a3b9080541fe7d4a71ceef0b04762 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Mon, 25 Nov 2024 16:06:53 +0100
Subject: [PATCH 1/2] [analyzer] Avoid out-of-order node traversal on void
 return

The motivating example:

```C++
void inf_loop_break_callee() {
  void* data = malloc(10);
  while (1) {
    (void)data; // line 4
    break; // -> execution continues on line 4 ?!!
  }
}
void inf_loop_break_caller() {
  inf_loop_break_callee();
}
```

To correct the flow steps in this example (see the fixed version in the
added test case) I changed two things in the engine:
 - Make `processCallExit` create a new StmtPoint only for return
   statements. If the last non-jump statement is not a return statement,
   e.g. `(void)data;`, it is no longer inserted in the exploded graph after
   the function exit.
 - Optionally skip the purge program points. In the example above, purge
   points are still inserted after the `break;` executes. Now, when the bug
   reporter is looking for the next statement executed after the function
   execution is finished, it will ignore the purge program points, so it
   won't confusingly pick the `(void)data;` statement.
---
 .../Core/PathSensitive/ExplodedGraph.h        |  4 +-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 40 +++++++++++++------
 .../lib/StaticAnalyzer/Core/ExplodedGraph.cpp |  5 ++-
 .../Core/ExprEngineCallAndReturn.cpp          |  9 ++++-
 ...-uninitialized-object-unguarded-access.cpp |  8 ++--
 .../test/Analysis/void-call-exit-modelling.c  | 17 ++++++++
 6 files changed, 63 insertions(+), 20 deletions(-)
 create mode 100644 clang/test/Analysis/void-call-exit-modelling.c

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
index 2fb05ac46e8fad..d0a8aa434bb576 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
@@ -278,7 +278,9 @@ class ExplodedNode : public llvm::FoldingSetNode {
   /// Useful for explaining control flow that follows the current node.
   /// If the statement belongs to a body-farmed definition, retrieve the
   /// call site for that definition.
-  const Stmt *getNextStmtForDiagnostics() const;
+  /// If skipPurge is true, skip the purge-dead-symbols nodes (that are often
+  /// inserted out-of-order by the endinge).
+  const Stmt *getNextStmtForDiagnostics(bool skipPurge) const;
 
   /// Find the statement that was executed immediately before this node.
   /// Useful when the node corresponds to a CFG block entrance.
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index b67e6cd86c3d60..1f531ffc292511 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -589,7 +589,8 @@ static void removePiecesWithInvalidLocations(PathPieces &Pieces) {
 
 PathDiagnosticLocation PathDiagnosticBuilder::ExecutionContinues(
     const PathDiagnosticConstruct &C) const {
-  if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
+  if (const Stmt *S =
+          C.getCurrentNode()->getNextStmtForDiagnostics(/*skipPurge=*/true))
     return PathDiagnosticLocation(S, getSourceManager(),
                                   C.getCurrLocationContext());
 
@@ -882,7 +883,8 @@ void PathDiagnosticBuilder::generateMinimalDiagForBlockEdge(
 
   case Stmt::GotoStmtClass:
   case Stmt::IndirectGotoStmtClass: {
-    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
+    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics(
+            /*skipPurge=*/false))
       C.getActivePath().push_front(generateDiagForGotoOP(C, S, Start));
     break;
   }
@@ -2416,6 +2418,28 @@ PathSensitiveBugReport::getRanges() const {
   return Ranges;
 }
 
+static bool exitingDestructor(const ExplodedNode *N) {
+  // Need to loop here, as some times the Error node is already outside of the
+  // destructor context, and the previous node is an edge that is also outside.
+  while (N && !N->getLocation().getAs<StmtPoint>()) {
+    N = N->getFirstPred();
+  }
+  return N && isa<CXXDestructorDecl>(N->getLocationContext()->getDecl());
+}
+
+static const Stmt *
+findReasonableStmtCloseToFunctionExit(const ExplodedNode *N) {
+  if (exitingDestructor(N)) {
+    // If we are exiting a destructor call, it is more useful to point to
+    // the next stmt which is usually the temporary declaration.
+    if (const Stmt *S = N->getNextStmtForDiagnostics(/*skipPurge=*/false))
+      return S;
+    // If next stmt is not found, it is likely the end of a top-level
+    // function analysis. find the last execution statement then.
+  }
+  return N->getPreviousStmtForDiagnostics();
+}
+
 PathDiagnosticLocation
 PathSensitiveBugReport::getLocation() const {
   assert(ErrorNode && "Cannot create a location with a null node.");
@@ -2433,18 +2457,10 @@ PathSensitiveBugReport::getLocation() const {
       if (const ReturnStmt *RS = FE->getStmt())
         return PathDiagnosticLocation::createBegin(RS, SM, LC);
 
-      // If we are exiting a destructor call, it is more useful to point to the
-      // next stmt which is usually the temporary declaration.
-      // For non-destructor and non-top-level calls, the next stmt will still
-      // refer to the last executed stmt of the body.
-      S = ErrorNode->getNextStmtForDiagnostics();
-      // If next stmt is not found, it is likely the end of a top-level function
-      // analysis. find the last execution statement then.
-      if (!S)
-        S = ErrorNode->getPreviousStmtForDiagnostics();
+      S = findReasonableStmtCloseToFunctionExit(ErrorNode);
     }
     if (!S)
-      S = ErrorNode->getNextStmtForDiagnostics();
+      S = ErrorNode->getNextStmtForDiagnostics(/*skipPurge=*/false);
   }
 
   if (S) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 11cef00ada330b..81dc45b6923524 100644
--- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -347,8 +347,11 @@ const Stmt *ExplodedNode::getStmtForDiagnostics() const {
   return nullptr;
 }
 
-const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
+const Stmt *ExplodedNode::getNextStmtForDiagnostics(bool skipPurge) const {
   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
+    if (skipPurge && N->getLocation().isPurgeKind()) {
+      continue;
+    }
     if (const Stmt *S = N->getStmtForDiagnostics()) {
       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
       // not actual statement points.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 2ca24d0c5ab22b..02facf786830d2 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -353,14 +353,19 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) {
   ExplodedNodeSet CleanedNodes;
   if (LastSt && Blk && AMgr.options.AnalysisPurgeOpt != PurgeNone) {
     static SimpleProgramPointTag retValBind("ExprEngine", "Bind Return Value");
-    PostStmt Loc(LastSt, calleeCtx, &retValBind);
+    auto Loc = isa<ReturnStmt>(LastSt)
+                   ? ProgramPoint{PostStmt(LastSt, calleeCtx, &retValBind)}
+                   : ProgramPoint{EpsilonPoint(calleeCtx, /*Data1=*/nullptr,
+                                               /*Data2=*/nullptr, &retValBind)};
+    const CFGBlock *PrePurgeBlock =
+        isa<ReturnStmt>(LastSt) ? Blk : &CEBNode->getCFG().getExit();
     bool isNew;
     ExplodedNode *BindedRetNode = G.getNode(Loc, state, false, &isNew);
     BindedRetNode->addPredecessor(CEBNode, G);
     if (!isNew)
       return;
 
-    NodeBuilderContext Ctx(getCoreEngine(), Blk, BindedRetNode);
+    NodeBuilderContext Ctx(getCoreEngine(), PrePurgeBlock, BindedRetNode);
     currBldrCtx = &Ctx;
     // Here, we call the Symbol Reaper with 0 statement and callee location
     // context, telling it to clean up everything in the callee's context
diff --git a/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp b/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
index 53e72e7c5f4f7c..611e1d8255976c 100644
--- a/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
+++ b/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
@@ -163,8 +163,8 @@ class UnguardedFieldThroughMethodTest {
       Volume = 0;
       break;
     case A:
-      Area = 0; // expected-warning {{1 uninitialized field}}
-      break;
+      Area = 0;
+      break; // expected-warning {{1 uninitialized field}}
     }
   }
 
@@ -201,8 +201,8 @@ class UnguardedPublicFieldsTest {
       Volume = 0;
       break;
     case A:
-      Area = 0; // expected-warning {{1 uninitialized field}}
-      break;
+      Area = 0;
+      break; // expected-warning {{1 uninitialized field}}
     }
   }
 
diff --git a/clang/test/Analysis/void-call-exit-modelling.c b/clang/test/Analysis/void-call-exit-modelling.c
new file mode 100644
index 00000000000000..961aac55082e31
--- /dev/null
+++ b/clang/test/Analysis/void-call-exit-modelling.c
@@ -0,0 +1,17 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -analyzer-output text -verify %s
+
+typedef __typeof(sizeof(int)) size_t;
+void *malloc(size_t size);
+
+void inf_loop_break_callee() {
+  void* data = malloc(10); // expected-note{{Memory is allocated}}
+  while (1) { // expected-note{{Loop condition is true}}
+    (void)data;
+    break; // No note that we jump to the line above from this break
+  } // expected-note at -1{{Execution jumps to the end of the function}}
+} // expected-warning{{Potential leak of memory pointed to by 'data'}}
+// expected-note at -1  {{Potential leak of memory pointed to by 'data'}}
+
+void inf_loop_break_caller() {
+  inf_loop_break_callee(); // expected-note{{Calling 'inf_loop_break_callee'}}
+}

>From eb1b47c55bb44fcaa4fb2f64c483905cde6cc846 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Wed, 27 Nov 2024 11:26:47 +0100
Subject: [PATCH 2/2] =?UTF-8?q?Test=20top-level=20function=20effect.=20Tha?=
 =?UTF-8?q?nks,=20Bal=C3=A1zs=20for=20this=20example?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 clang/test/Analysis/void-call-exit-modelling.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/test/Analysis/void-call-exit-modelling.c b/clang/test/Analysis/void-call-exit-modelling.c
index 961aac55082e31..6fc8cb54f00bcd 100644
--- a/clang/test/Analysis/void-call-exit-modelling.c
+++ b/clang/test/Analysis/void-call-exit-modelling.c
@@ -15,3 +15,12 @@ void inf_loop_break_callee() {
 void inf_loop_break_caller() {
   inf_loop_break_callee(); // expected-note{{Calling 'inf_loop_break_callee'}}
 }
+
+void inf_loop_break_top() {
+  void* data = malloc(10); // expected-note{{Memory is allocated}}
+  while (1) { // expected-note{{Loop condition is true}}
+    (void)data;
+    break; // No note that we jump to the line above from this break
+  } // expected-note at -1{{Execution jumps to the end of the function}}
+} // expected-warning{{Potential leak of memory pointed to by 'data'}}
+// expected-note at -1  {{Potential leak of memory pointed to by 'data'}}