[clang] [analyzer] Avoid out-of-order node traversal on void return (PR #117863)

Arseniy Zaostrovnykh via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 27 04:41:31 PST 2024


https://github.com/necto updated https://github.com/llvm/llvm-project/pull/117863

>From c725ed73555a3b9080541fe7d4a71ceef0b04762 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Mon, 25 Nov 2024 16:06:53 +0100
Subject: [PATCH 1/5] [analyzer] Avoid out-of-order node traversal on void
 return

The motivating example:

```C++
void inf_loop_break_callee() {
  void* data = malloc(10);
  while (1) {
    (void)data; // line 4
    break; // -> execution continues on line 4 ?!!
  }
}
void inf_loop_break_caller() {
  inf_loop_break_callee();
}
```

To correct the flow steps in this example (see the fixed version in the
added test case) I changed two things in the engine:
 - Make `processCallExit` create a new StmtPoint only for return
   statements. If the last non-jump statement is not a return statement,
   e.g. `(void)data;`, it is no longer inserted in the exploded graph after
   the function exit.
 - Optionally skip the purge program points. In the example above, purge
   points are still inserted after the `break;` executes. Now, when the bug
   reporter is looking for the next statement executed after the function
   execution is finished, it will ignore the purge program points, so it
   won't confusingly pick the `(void)data;` statement.
---
 .../Core/PathSensitive/ExplodedGraph.h        |  4 +-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 40 +++++++++++++------
 .../lib/StaticAnalyzer/Core/ExplodedGraph.cpp |  5 ++-
 .../Core/ExprEngineCallAndReturn.cpp          |  9 ++++-
 ...-uninitialized-object-unguarded-access.cpp |  8 ++--
 .../test/Analysis/void-call-exit-modelling.c  | 17 ++++++++
 6 files changed, 63 insertions(+), 20 deletions(-)
 create mode 100644 clang/test/Analysis/void-call-exit-modelling.c

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
index 2fb05ac46e8fad..d0a8aa434bb576 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
@@ -278,7 +278,9 @@ class ExplodedNode : public llvm::FoldingSetNode {
   /// Useful for explaining control flow that follows the current node.
   /// If the statement belongs to a body-farmed definition, retrieve the
   /// call site for that definition.
-  const Stmt *getNextStmtForDiagnostics() const;
+  /// If skipPurge is true, skip the purge-dead-symbols nodes (that are often
+  /// inserted out-of-order by the endinge).
+  const Stmt *getNextStmtForDiagnostics(bool skipPurge) const;
 
   /// Find the statement that was executed immediately before this node.
   /// Useful when the node corresponds to a CFG block entrance.
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index b67e6cd86c3d60..1f531ffc292511 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -589,7 +589,8 @@ static void removePiecesWithInvalidLocations(PathPieces &Pieces) {
 
 PathDiagnosticLocation PathDiagnosticBuilder::ExecutionContinues(
     const PathDiagnosticConstruct &C) const {
-  if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
+  if (const Stmt *S =
+          C.getCurrentNode()->getNextStmtForDiagnostics(/*skipPurge=*/true))
     return PathDiagnosticLocation(S, getSourceManager(),
                                   C.getCurrLocationContext());
 
@@ -882,7 +883,8 @@ void PathDiagnosticBuilder::generateMinimalDiagForBlockEdge(
 
   case Stmt::GotoStmtClass:
   case Stmt::IndirectGotoStmtClass: {
-    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
+    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics(
+            /*skipPurge=*/false))
       C.getActivePath().push_front(generateDiagForGotoOP(C, S, Start));
     break;
   }
@@ -2416,6 +2418,28 @@ PathSensitiveBugReport::getRanges() const {
   return Ranges;
 }
 
+static bool exitingDestructor(const ExplodedNode *N) {
+  // Need to loop here, as some times the Error node is already outside of the
+  // destructor context, and the previous node is an edge that is also outside.
+  while (N && !N->getLocation().getAs<StmtPoint>()) {
+    N = N->getFirstPred();
+  }
+  return N && isa<CXXDestructorDecl>(N->getLocationContext()->getDecl());
+}
+
+static const Stmt *
+findReasonableStmtCloseToFunctionExit(const ExplodedNode *N) {
+  if (exitingDestructor(N)) {
+    // If we are exiting a destructor call, it is more useful to point to
+    // the next stmt which is usually the temporary declaration.
+    if (const Stmt *S = N->getNextStmtForDiagnostics(/*skipPurge=*/false))
+      return S;
+    // If next stmt is not found, it is likely the end of a top-level
+    // function analysis. find the last execution statement then.
+  }
+  return N->getPreviousStmtForDiagnostics();
+}
+
 PathDiagnosticLocation
 PathSensitiveBugReport::getLocation() const {
   assert(ErrorNode && "Cannot create a location with a null node.");
@@ -2433,18 +2457,10 @@ PathSensitiveBugReport::getLocation() const {
       if (const ReturnStmt *RS = FE->getStmt())
         return PathDiagnosticLocation::createBegin(RS, SM, LC);
 
-      // If we are exiting a destructor call, it is more useful to point to the
-      // next stmt which is usually the temporary declaration.
-      // For non-destructor and non-top-level calls, the next stmt will still
-      // refer to the last executed stmt of the body.
-      S = ErrorNode->getNextStmtForDiagnostics();
-      // If next stmt is not found, it is likely the end of a top-level function
-      // analysis. find the last execution statement then.
-      if (!S)
-        S = ErrorNode->getPreviousStmtForDiagnostics();
+      S = findReasonableStmtCloseToFunctionExit(ErrorNode);
     }
     if (!S)
-      S = ErrorNode->getNextStmtForDiagnostics();
+      S = ErrorNode->getNextStmtForDiagnostics(/*skipPurge=*/false);
   }
 
   if (S) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 11cef00ada330b..81dc45b6923524 100644
--- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -347,8 +347,11 @@ const Stmt *ExplodedNode::getStmtForDiagnostics() const {
   return nullptr;
 }
 
-const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
+const Stmt *ExplodedNode::getNextStmtForDiagnostics(bool skipPurge) const {
   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
+    if (skipPurge && N->getLocation().isPurgeKind()) {
+      continue;
+    }
     if (const Stmt *S = N->getStmtForDiagnostics()) {
       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
       // not actual statement points.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 2ca24d0c5ab22b..02facf786830d2 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -353,14 +353,19 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) {
   ExplodedNodeSet CleanedNodes;
   if (LastSt && Blk && AMgr.options.AnalysisPurgeOpt != PurgeNone) {
     static SimpleProgramPointTag retValBind("ExprEngine", "Bind Return Value");
-    PostStmt Loc(LastSt, calleeCtx, &retValBind);
+    auto Loc = isa<ReturnStmt>(LastSt)
+                   ? ProgramPoint{PostStmt(LastSt, calleeCtx, &retValBind)}
+                   : ProgramPoint{EpsilonPoint(calleeCtx, /*Data1=*/nullptr,
+                                               /*Data2=*/nullptr, &retValBind)};
+    const CFGBlock *PrePurgeBlock =
+        isa<ReturnStmt>(LastSt) ? Blk : &CEBNode->getCFG().getExit();
     bool isNew;
     ExplodedNode *BindedRetNode = G.getNode(Loc, state, false, &isNew);
     BindedRetNode->addPredecessor(CEBNode, G);
     if (!isNew)
       return;
 
-    NodeBuilderContext Ctx(getCoreEngine(), Blk, BindedRetNode);
+    NodeBuilderContext Ctx(getCoreEngine(), PrePurgeBlock, BindedRetNode);
     currBldrCtx = &Ctx;
     // Here, we call the Symbol Reaper with 0 statement and callee location
     // context, telling it to clean up everything in the callee's context
diff --git a/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp b/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
index 53e72e7c5f4f7c..611e1d8255976c 100644
--- a/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
+++ b/clang/test/Analysis/cxx-uninitialized-object-unguarded-access.cpp
@@ -163,8 +163,8 @@ class UnguardedFieldThroughMethodTest {
       Volume = 0;
       break;
     case A:
-      Area = 0; // expected-warning {{1 uninitialized field}}
-      break;
+      Area = 0;
+      break; // expected-warning {{1 uninitialized field}}
     }
   }
 
@@ -201,8 +201,8 @@ class UnguardedPublicFieldsTest {
       Volume = 0;
       break;
     case A:
-      Area = 0; // expected-warning {{1 uninitialized field}}
-      break;
+      Area = 0;
+      break; // expected-warning {{1 uninitialized field}}
     }
   }
 
diff --git a/clang/test/Analysis/void-call-exit-modelling.c b/clang/test/Analysis/void-call-exit-modelling.c
new file mode 100644
index 00000000000000..961aac55082e31
--- /dev/null
+++ b/clang/test/Analysis/void-call-exit-modelling.c
@@ -0,0 +1,17 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Malloc -analyzer-output text -verify %s
+
+typedef __typeof(sizeof(int)) size_t;
+void *malloc(size_t size);
+
+void inf_loop_break_callee() {
+  void* data = malloc(10); // expected-note{{Memory is allocated}}
+  while (1) { // expected-note{{Loop condition is true}}
+    (void)data;
+    break; // No note that we jump to the line above from this break
+  } // expected-note at -1{{Execution jumps to the end of the function}}
+} // expected-warning{{Potential leak of memory pointed to by 'data'}}
+// expected-note at -1  {{Potential leak of memory pointed to by 'data'}}
+
+void inf_loop_break_caller() {
+  inf_loop_break_callee(); // expected-note{{Calling 'inf_loop_break_callee'}}
+}

>From eb1b47c55bb44fcaa4fb2f64c483905cde6cc846 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Wed, 27 Nov 2024 11:26:47 +0100
Subject: [PATCH 2/5] =?UTF-8?q?Test=20top-level=20function=20effect.=20Tha?=
 =?UTF-8?q?nks,=20Bal=C3=A1zs=20for=20this=20example?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 clang/test/Analysis/void-call-exit-modelling.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/test/Analysis/void-call-exit-modelling.c b/clang/test/Analysis/void-call-exit-modelling.c
index 961aac55082e31..6fc8cb54f00bcd 100644
--- a/clang/test/Analysis/void-call-exit-modelling.c
+++ b/clang/test/Analysis/void-call-exit-modelling.c
@@ -15,3 +15,12 @@ void inf_loop_break_callee() {
 void inf_loop_break_caller() {
   inf_loop_break_callee(); // expected-note{{Calling 'inf_loop_break_callee'}}
 }
+
+void inf_loop_break_top() {
+  void* data = malloc(10); // expected-note{{Memory is allocated}}
+  while (1) { // expected-note{{Loop condition is true}}
+    (void)data;
+    break; // No note that we jump to the line above from this break
+  } // expected-note at -1{{Execution jumps to the end of the function}}
+} // expected-warning{{Potential leak of memory pointed to by 'data'}}
+// expected-note at -1  {{Potential leak of memory pointed to by 'data'}}

>From 4555ae3fb402cc9d01999fd07a6228db876a4776 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Wed, 27 Nov 2024 13:04:51 +0100
Subject: [PATCH 3/5] [NFC] Remove curly braces around a single-statement block

---
 clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 81dc45b6923524..d228a332922595 100644
--- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -349,9 +349,8 @@ const Stmt *ExplodedNode::getStmtForDiagnostics() const {
 
 const Stmt *ExplodedNode::getNextStmtForDiagnostics(bool skipPurge) const {
   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
-    if (skipPurge && N->getLocation().isPurgeKind()) {
+    if (skipPurge && N->getLocation().isPurgeKind())
       continue;
-    }
     if (const Stmt *S = N->getStmtForDiagnostics()) {
       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
       // not actual statement points.

>From 674befcae4b844e0c63a1ea28c153bc529fc3e39 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Wed, 27 Nov 2024 13:39:48 +0100
Subject: [PATCH 4/5] skipPurge unconditionally

---
 .../StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h  |  4 +---
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp          |  9 ++++-----
 clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp        |  4 ++--
 clang/test/Analysis/copy-elision.cpp                   | 10 +++++-----
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
index d0a8aa434bb576..2fb05ac46e8fad 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h
@@ -278,9 +278,7 @@ class ExplodedNode : public llvm::FoldingSetNode {
   /// Useful for explaining control flow that follows the current node.
   /// If the statement belongs to a body-farmed definition, retrieve the
   /// call site for that definition.
-  /// If skipPurge is true, skip the purge-dead-symbols nodes (that are often
-  /// inserted out-of-order by the endinge).
-  const Stmt *getNextStmtForDiagnostics(bool skipPurge) const;
+  const Stmt *getNextStmtForDiagnostics() const;
 
   /// Find the statement that was executed immediately before this node.
   /// Useful when the node corresponds to a CFG block entrance.
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 1f531ffc292511..725a99ffe02011 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -590,7 +590,7 @@ static void removePiecesWithInvalidLocations(PathPieces &Pieces) {
 PathDiagnosticLocation PathDiagnosticBuilder::ExecutionContinues(
     const PathDiagnosticConstruct &C) const {
   if (const Stmt *S =
-          C.getCurrentNode()->getNextStmtForDiagnostics(/*skipPurge=*/true))
+          C.getCurrentNode()->getNextStmtForDiagnostics())
     return PathDiagnosticLocation(S, getSourceManager(),
                                   C.getCurrLocationContext());
 
@@ -883,8 +883,7 @@ void PathDiagnosticBuilder::generateMinimalDiagForBlockEdge(
 
   case Stmt::GotoStmtClass:
   case Stmt::IndirectGotoStmtClass: {
-    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics(
-            /*skipPurge=*/false))
+    if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
       C.getActivePath().push_front(generateDiagForGotoOP(C, S, Start));
     break;
   }
@@ -2432,7 +2431,7 @@ findReasonableStmtCloseToFunctionExit(const ExplodedNode *N) {
   if (exitingDestructor(N)) {
     // If we are exiting a destructor call, it is more useful to point to
     // the next stmt which is usually the temporary declaration.
-    if (const Stmt *S = N->getNextStmtForDiagnostics(/*skipPurge=*/false))
+    if (const Stmt *S = N->getNextStmtForDiagnostics())
       return S;
     // If next stmt is not found, it is likely the end of a top-level
     // function analysis. find the last execution statement then.
@@ -2460,7 +2459,7 @@ PathSensitiveBugReport::getLocation() const {
       S = findReasonableStmtCloseToFunctionExit(ErrorNode);
     }
     if (!S)
-      S = ErrorNode->getNextStmtForDiagnostics(/*skipPurge=*/false);
+      S = ErrorNode->getNextStmtForDiagnostics();
   }
 
   if (S) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index d228a332922595..1e0cc2eea9ed85 100644
--- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -347,9 +347,9 @@ const Stmt *ExplodedNode::getStmtForDiagnostics() const {
   return nullptr;
 }
 
-const Stmt *ExplodedNode::getNextStmtForDiagnostics(bool skipPurge) const {
+const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
-    if (skipPurge && N->getLocation().isPurgeKind())
+    if (N->getLocation().isPurgeKind())
       continue;
     if (const Stmt *S = N->getStmtForDiagnostics()) {
       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
diff --git a/clang/test/Analysis/copy-elision.cpp b/clang/test/Analysis/copy-elision.cpp
index 423c4519f5bfc6..cd941854fc7f19 100644
--- a/clang/test/Analysis/copy-elision.cpp
+++ b/clang/test/Analysis/copy-elision.cpp
@@ -263,17 +263,17 @@ void testVariable() {
 
 struct TestCtorInitializer {
   ClassWithDestructor c;
-  TestCtorInitializer(AddressVector<ClassWithDestructor> &v)
-    : c(ClassWithDestructor(v)) {}
-  // no-elide-warning at -1 {{Address of stack memory associated with temporary \
-object of type 'ClassWithDestructor' is still referred \
-to by the caller variable 'v' upon returning to the caller}}
+  TestCtorInitializer(AddressVector<ClassWithDestructor> &refParam)
+    : c(ClassWithDestructor(refParam)) {}
 };
 
 void testCtorInitializer() {
   AddressVector<ClassWithDestructor> v;
   {
     TestCtorInitializer t(v);
+    // no-elide-warning at -1 {{Address of stack memory associated with temporary \
+object of type 'ClassWithDestructor' is still referred \
+to by the caller variable 'v' upon returning to the caller}}
     // Check if the last destructor is an automatic destructor.
     // A temporary destructor would have fired by now.
 #if ELIDE

>From 525c6aba9531660a37b777ecd74033d0a0f57730 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Wed, 27 Nov 2024 13:41:12 +0100
Subject: [PATCH 5/5] [NFC] fix format

---
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 725a99ffe02011..2904eab0097dc8 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -589,8 +589,7 @@ static void removePiecesWithInvalidLocations(PathPieces &Pieces) {
 
 PathDiagnosticLocation PathDiagnosticBuilder::ExecutionContinues(
     const PathDiagnosticConstruct &C) const {
-  if (const Stmt *S =
-          C.getCurrentNode()->getNextStmtForDiagnostics())
+  if (const Stmt *S = C.getCurrentNode()->getNextStmtForDiagnostics())
     return PathDiagnosticLocation(S, getSourceManager(),
                                   C.getCurrLocationContext());
 



More information about the cfe-commits mailing list