[clang] [llvm] [clang-tools-extra] [analyzer] Trust base to derived casts for dynamic types (PR #69057)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 29 06:44:30 PST 2023


https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/69057

>From 8ed4effd114ebd83d4ceaa37655ffd9b7105b28e Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 14 Oct 2023 15:51:42 +0200
Subject: [PATCH 1/5] [analyzer] Trust base to derived casts for dynamic types

When doing a base to derived cast, and we should add a cast info
recording that fact.
This information will be then used for example inside
`CXXInstanceCall::getRuntimeDefinition` (CallEvent.cpp), where for
virtual calls, it will query the associated dynamic type for the `*this` region.

Note that inside `ExprEngine::defaultEvalCall`, if the runtime
definition is found but it might have other definitions, then we will
split the path and have one where we inline the candidate and one other
doing conservative call evaluation. This ensures that if we inlined the
wrong definition, we still have a path where conservative evaluation happened.
In addition to this, remember that once a function was conservatively
evaluated on a path, then further calls to the same function will always
result in conservative evaluation, see `ExprEngine::BifurcateCall` and
the `DynamicDispatchBifurcationMap`.

As a consequence of these rules, we can have execution paths where a
function call might be resolved to different implemetations, depending
on the sequence of events, like here:

```C++
void top(Base *p) {
  p->fun(); // Will split into 2 paths:
  // 1) Inlines the virtual `Base::fun()`.
  // 2) Conservative eval calls, and remembers to always conservatively eval this function.

  // Perform a base to derived cast, and just discard the result.
  (void)static_cast<Derived *>(p);

  p->fun(); // Will split path (1) into 2:
  // 2.1) Inlines the virtual `Derived::fun()`.
  // 2.2) Conservative eval calls, and remembers to always conservatively eval this function.

  // Now, we have exactly 3 paths at this point.
}
```

Fixes #62663
---
 .../Checkers/DynamicTypePropagation.cpp       |  27 +++--
 .../Analysis/cast-trust-base-to-derived.cpp   | 105 ++++++++++++++++++
 .../test/Analysis/osobject-retain-release.cpp |  12 +-
 3 files changed, 129 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/Analysis/cast-trust-base-to-derived.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index eda8302595ba4f..6539f3c1e09e32 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -20,6 +20,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/OperationKinds.h"
 #include "clang/AST/ParentMap.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/Builtins.h"
@@ -392,10 +393,6 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
   }
 }
 
-/// TODO: Handle explicit casts.
-///       Handle C++ casts.
-///
-/// Precondition: the cast is between ObjCObjectPointers.
 ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts(
     const CastExpr *CE, ProgramStateRef &State, CheckerContext &C) const {
   // We only track type info for regions.
@@ -403,8 +400,19 @@ ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts(
   if (!ToR)
     return C.getPredecessor();
 
-  if (isa<ExplicitCastExpr>(CE))
+  if (CE->getCastKind() == CK_BaseToDerived) {
+    bool CastSucceeds = true;
+    QualType FromTy = CE->getSubExpr()->getType();
+    QualType ToTy = CE->getType();
+    ToR = ToR->StripCasts(/*StripBaseAndDerivedCasts=*/true);
+    State = setDynamicTypeAndCastInfo(State, ToR, FromTy, ToTy, CastSucceeds);
+    return C.addTransition(State);
+  }
+
+  // TODO: Handle the rest of explicit casts, inluding the regular C++ casts.
+  if (isa<ExplicitCastExpr>(CE)) {
     return C.getPredecessor();
+  }
 
   if (const Type *NewTy = getBetterObjCType(CE, C)) {
     State = setDynamicTypeInfo(State, ToR, QualType(NewTy, 0));
@@ -609,9 +617,13 @@ storeWhenMoreInformative(ProgramStateRef &State, SymbolRef Sym,
 /// symbol and the destination type of the cast are unrelated, report an error.
 void DynamicTypePropagation::checkPostStmt(const CastExpr *CE,
                                            CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  ExplodedNode *AfterTypeProp = dynamicTypePropagationOnCasts(CE, State, C);
+
   if (CE->getCastKind() != CK_BitCast)
     return;
 
+  ASTContext &ASTCtxt = C.getASTContext();
   QualType OriginType = CE->getSubExpr()->getType();
   QualType DestType = CE->getType();
 
@@ -621,11 +633,6 @@ void DynamicTypePropagation::checkPostStmt(const CastExpr *CE,
   if (!OrigObjectPtrType || !DestObjectPtrType)
     return;
 
-  ProgramStateRef State = C.getState();
-  ExplodedNode *AfterTypeProp = dynamicTypePropagationOnCasts(CE, State, C);
-
-  ASTContext &ASTCtxt = C.getASTContext();
-
   // This checker detects the subtyping relationships using the assignment
   // rules. In order to be able to do this the kindofness must be stripped
   // first. The checker treats every type as kindof type anyways: when the
diff --git a/clang/test/Analysis/cast-trust-base-to-derived.cpp b/clang/test/Analysis/cast-trust-base-to-derived.cpp
new file mode 100644
index 00000000000000..e717e5617867ca
--- /dev/null
+++ b/clang/test/Analysis/cast-trust-base-to-derived.cpp
@@ -0,0 +1,105 @@
+// RUN: %clang_analyze_cc1 %s -verify \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.taint.TaintPropagation \
+// RUN:   -analyzer-checker=debug.ExprInspection
+
+// See issue https://github.com/llvm/llvm-project/issues/62663
+
+template <typename T> void clang_analyzer_dump(T);
+void clang_analyzer_warnIfReached();
+void clang_analyzer_numTimesReached();
+void clang_analyzer_isTainted(int);
+
+extern int scanf(const char *format, ...);
+
+class ActionHandler {
+public:
+  virtual ~ActionHandler() = default;
+  virtual void onAction(int x, int &) {
+    clang_analyzer_dump(x + 1); // expected-warning {{101}}
+  }
+};
+
+class MyHandler final : public ActionHandler {
+public:
+  void onAction(int x, int &) override {
+    clang_analyzer_dump(x + 2); // expected-warning {{202}}
+  }
+};
+
+void trust_static_types(ActionHandler *p) {
+  // This variable will help to see if conservative call evaluation happened or not.
+  int invalidation_detector;
+
+  // At this point we don't know anything about the dynamic type of `*p`, thus the `onAction` call might be resolved to the default implementation, matching the static type.
+  invalidation_detector = 1000;
+  p->onAction(100, invalidation_detector);
+  clang_analyzer_dump(invalidation_detector);
+  // expected-warning at -1 {{1000}} on this path we trusted the type, and resolved the call to `ActionHandler::onAction(int, int&)`
+  // expected-warning at -2 {{conj}} on this path we conservatively evaluated the previous call
+
+  clang_analyzer_numTimesReached(); // expected-warning {{2}} we only have that 2 paths here
+  // 1) inlined `ActionHandler::onAction(int, int&)`
+  // 2) conservatively eval called
+
+  // Trust that the `*p` refers to an object with `MyHandler` static type (or to some other sub-class).
+  auto *q = static_cast<MyHandler *>(p);
+  (void)q; // Discard the result of the cast. We already learned the type `p` might refer to.
+
+  invalidation_detector = 2000;
+  p->onAction(200, invalidation_detector);
+  clang_analyzer_dump(invalidation_detector);
+  // expected-warning at -1 {{2000}} on this path we trusted the type, and resolved the call to `MyHandler::onAction(int, int&)`
+  // expected-warning at -2 {{conj}} on this path we conservatively evaluated the previous call
+
+  clang_analyzer_numTimesReached(); // expected-warning {{3}} we only have 3 paths here, not 4
+  // 1) inlined 2 different callees
+  // 2) inlined only 1st
+  // 3) non were inlined
+  // 4) inlined only the second: This can't happen because if we conservative called a specific function on a path, we will always evaluate it like that.
+  //    See ExprEngine::BifurcateCall and DynamicDispatchBifurcationMap.
+}
+
+// -------
+
+class Base {
+public:
+  virtual void OnRecvCancel(int port) = 0;
+};
+class Handler final : public Base {
+public:
+  void OnRecvCancel(int port) override {
+    clang_analyzer_dump(100 + port); // expected-warning {{+ 150}}
+    clang_analyzer_isTainted(port);  // expected-warning {{YES}}
+  }
+};
+class PParent  {
+public:
+  bool OnMessageReceived();
+};
+class Actor {
+public:
+  explicit Actor(Base* aRequest) : m(aRequest) {}
+protected:
+  Base* m;
+};
+
+class Parent : public Actor, public PParent {
+public:
+  explicit Parent(Base* aRequest) : Actor(aRequest) {}
+  bool RecvCancel(int port) {
+    clang_analyzer_dump(200 + port); // expected-warning {{+ 200}}
+    clang_analyzer_isTainted(port);  // expected-warning {{YES}}
+    Handler* foo = (Handler*)m;
+    foo->OnRecvCancel(port + 50);
+    return true;
+  }
+};
+
+bool PParent::OnMessageReceived() {
+  int port;
+  scanf("%i", &port);
+  clang_analyzer_isTainted(port); // expected-warning {{YES}}
+  Parent* foo = static_cast<Parent*>(this);
+  return foo->RecvCancel(port);
+}
diff --git a/clang/test/Analysis/osobject-retain-release.cpp b/clang/test/Analysis/osobject-retain-release.cpp
index 2ae5752f440237..b4085813a72a1a 100644
--- a/clang/test/Analysis/osobject-retain-release.cpp
+++ b/clang/test/Analysis/osobject-retain-release.cpp
@@ -492,11 +492,13 @@ void check_required_cast() {
 
 void check_cast_behavior(OSObject *obj) {
   OSArray *arr1 = OSDynamicCast(OSArray, obj);
-  clang_analyzer_eval(arr1 == obj); // expected-warning{{TRUE}}
-                                    // expected-note at -1{{TRUE}}
-                                    // expected-note at -2{{Assuming 'arr1' is not equal to 'obj'}}
-                                    // expected-warning at -3{{FALSE}}
-                                    // expected-note at -4   {{FALSE}}
+  clang_analyzer_eval(arr1 == obj); // #check_cast_behavior_1
+  // expected-warning@#check_cast_behavior_1 {{TRUE}}
+  // expected-note@#check_cast_behavior_1    {{TRUE}}
+  // expected-note@#check_cast_behavior_1{{Assuming 'arr1' is equal to 'obj'}}
+  // expected-warning@#check_cast_behavior_1 {{FALSE}}
+  // expected-note@#check_cast_behavior_1    {{FALSE}}
+  // expected-note@#check_cast_behavior_1{{Assuming 'arr1' is not equal to 'obj'}}
   OSArray *arr2 = OSRequiredCast(OSArray, obj);
   clang_analyzer_eval(arr2 == obj); // expected-warning{{TRUE}}
                                     // expected-note at -1{{TRUE}}

>From 3a0a5704903ae8120af05219d75c9448f98b957a Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 16 Oct 2023 17:53:27 +0200
Subject: [PATCH 2/5] Fix 'non' -> 'none' typo

---
 clang/test/Analysis/cast-trust-base-to-derived.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Analysis/cast-trust-base-to-derived.cpp b/clang/test/Analysis/cast-trust-base-to-derived.cpp
index e717e5617867ca..98350899229ee5 100644
--- a/clang/test/Analysis/cast-trust-base-to-derived.cpp
+++ b/clang/test/Analysis/cast-trust-base-to-derived.cpp
@@ -55,7 +55,7 @@ void trust_static_types(ActionHandler *p) {
   clang_analyzer_numTimesReached(); // expected-warning {{3}} we only have 3 paths here, not 4
   // 1) inlined 2 different callees
   // 2) inlined only 1st
-  // 3) non were inlined
+  // 3) none were inlined
   // 4) inlined only the second: This can't happen because if we conservative called a specific function on a path, we will always evaluate it like that.
   //    See ExprEngine::BifurcateCall and DynamicDispatchBifurcationMap.
 }

>From 102e70889b176580ea77893d540ccf45f3390377 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 16 Oct 2023 17:56:57 +0200
Subject: [PATCH 3/5] Demonstrate how type inference replaces the dynamic-type

---
 .../Analysis/cast-trust-base-to-derived.cpp   | 18 ++++++++
 clang/test/Analysis/cast-value-logic.cpp      | 41 ++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/clang/test/Analysis/cast-trust-base-to-derived.cpp b/clang/test/Analysis/cast-trust-base-to-derived.cpp
index 98350899229ee5..0d374aa3d42751 100644
--- a/clang/test/Analysis/cast-trust-base-to-derived.cpp
+++ b/clang/test/Analysis/cast-trust-base-to-derived.cpp
@@ -27,6 +27,13 @@ class MyHandler final : public ActionHandler {
   }
 };
 
+class MyOtherHandler final : public ActionHandler {
+public:
+  void onAction(int x, int &) override {
+    clang_analyzer_dump(x + 3); // expected-warning {{403}}
+  }
+};
+
 void trust_static_types(ActionHandler *p) {
   // This variable will help to see if conservative call evaluation happened or not.
   int invalidation_detector;
@@ -60,6 +67,17 @@ void trust_static_types(ActionHandler *p) {
   //    See ExprEngine::BifurcateCall and DynamicDispatchBifurcationMap.
 }
 
+
+void conflicting_casts(ActionHandler *p) {
+  (void)static_cast<MyHandler *>(p);
+  (void)static_cast<MyOtherHandler *>(p);
+  int invalidation_detector = 4000;
+  p->onAction(400, invalidation_detector);
+  clang_analyzer_dump(invalidation_detector);
+  // expected-warning at -1 {{4000}}
+  // expected-warning at -2 {{conj}}
+}
+
 // -------
 
 class Base {
diff --git a/clang/test/Analysis/cast-value-logic.cpp b/clang/test/Analysis/cast-value-logic.cpp
index 52a94f24fba670..fa359f2f13d510 100644
--- a/clang/test/Analysis/cast-value-logic.cpp
+++ b/clang/test/Analysis/cast-value-logic.cpp
@@ -7,6 +7,7 @@
 void clang_analyzer_numTimesReached();
 void clang_analyzer_warnIfReached();
 void clang_analyzer_eval(bool);
+void clang_analyzer_dump(int);
 
 namespace clang {
 struct Shape {
@@ -17,20 +18,50 @@ struct Shape {
   const T *getAs() const;
 
   virtual double area();
+  virtual void print(int n) const {
+    clang_analyzer_dump(n + 1); // expected-warning {{+ 1}} this was analyzed in top-level-context
+  }
 };
-class Triangle : public Shape {};
-class Rectangle : public Shape {};
-class Hexagon : public Shape {};
-class Circle : public Shape {
+struct Triangle : public Shape {
+  void print(int n) const override {
+    clang_analyzer_dump(n + 2); // expected-warning {{+ 2}} this was analyzed in top-level-context
+  }
+};
+struct Rectangle : public Shape {
+  void print(int n) const override {
+    clang_analyzer_dump(n + 3);
+    // expected-warning at -1 {{103}} this was inlined from 'test_conflicting_type_information' 'A->print(100)'
+    // expected-warning at -2 {{303}} this was inlined from 'test_conflicting_type_information' 'R->print(300)'
+  }
+};
+struct Hexagon : public Shape {
+  void print(int n) const override {
+    clang_analyzer_dump(n + 4); // expected-warning {{+ 4}}
+  }
+};
+struct Circle : public Shape {
 public:
   ~Circle();
 };
-class SuspiciouslySpecificCircle : public Circle {};
+struct SuspiciouslySpecificCircle : public Circle {};
 } // namespace clang
 
 using namespace llvm;
 using namespace clang;
 
+void test_conflicting_type_information(const Shape *A) {
+  if (auto const *T = dyn_cast<Triangle>(A)) {
+    // Now, we associate 'Triangle' with 'A'.
+    if (auto const *R = dyn_cast<Rectangle>(A)) {
+      // Now, we associate 'Rectangle' with 'A'.
+      // This should be unreachable becasue 'A' cannot be a 'Triangle' and a 'Rectangle' at the same time.
+      A->print(100); // Splits into 2 paths: (1) conservative eval, (2) inlining `Rectangle::print`
+      T->print(200); // On path (2), it will conservative eval, because the dynamic type
+      R->print(300); // Splits on path (2) into other 2 paths: (3) conservative eval, (4) inlining `Rectangle::print`
+    }
+  }
+}
+
 void test_regions_dyn_cast(const Shape *A, const Shape *B) {
   if (dyn_cast<Circle>(A) && !dyn_cast<Circle>(B))
     clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}

>From 96729b41068d6847e34984fa289998104b06b236 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 16 Oct 2023 18:02:58 +0200
Subject: [PATCH 4/5] Move ASTCtxt back where it was originally defined

---
 clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 6539f3c1e09e32..4e6e5e141f822e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -623,7 +623,6 @@ void DynamicTypePropagation::checkPostStmt(const CastExpr *CE,
   if (CE->getCastKind() != CK_BitCast)
     return;
 
-  ASTContext &ASTCtxt = C.getASTContext();
   QualType OriginType = CE->getSubExpr()->getType();
   QualType DestType = CE->getType();
 
@@ -633,6 +632,8 @@ void DynamicTypePropagation::checkPostStmt(const CastExpr *CE,
   if (!OrigObjectPtrType || !DestObjectPtrType)
     return;
 
+  ASTContext &ASTCtxt = C.getASTContext();
+
   // This checker detects the subtyping relationships using the assignment
   // rules. In order to be able to do this the kindofness must be stripped
   // first. The checker treats every type as kindof type anyways: when the

>From b2d9238d9b2e966b75586a9fb13b08062a18b907 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Fri, 29 Dec 2023 15:11:30 +0100
Subject: [PATCH 5/5] Fix crash on casting references

---
 .../Checkers/DynamicTypePropagation.cpp       | 19 ++++++++++--
 .../Analysis/cast-trust-base-to-derived.cpp   | 31 +++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 323b3506b8f588..8994d5de9c0b54 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -20,6 +20,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/OperationKinds.h"
 #include "clang/AST/ParentMap.h"
 #include "clang/AST/RecursiveASTVisitor.h"
@@ -393,6 +394,19 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
   }
 }
 
+// For some reason a CastExpr casting to an lvalue does not have a reference
+// type. This function adds that reference to the expr type.
+static QualType getTypeOfExpression(const Expr *E, const ASTContext &Ctx) {
+  QualType Ty = E->getType().getCanonicalType();
+  if (Ty->isPointerType() || Ty->isReferenceType())
+    return Ty;
+  if (E->isLValue())
+    return Ctx.getLValueReferenceType(Ty);
+  if (E->isXValue())
+    return Ctx.getRValueReferenceType(Ty);
+  return Ty;
+}
+
 ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts(
     const CastExpr *CE, ProgramStateRef &State, CheckerContext &C) const {
   // We only track type info for regions.
@@ -402,8 +416,9 @@ ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts(
 
   if (CE->getCastKind() == CK_BaseToDerived) {
     bool CastSucceeds = true;
-    QualType FromTy = CE->getSubExpr()->getType();
-    QualType ToTy = CE->getType();
+    const ASTContext &Ctx = C.getASTContext();
+    QualType FromTy = getTypeOfExpression(CE->getSubExpr(), Ctx);
+    QualType ToTy = getTypeOfExpression(CE, Ctx);
     ToR = ToR->StripCasts(/*StripBaseAndDerivedCasts=*/true);
     State = setDynamicTypeAndCastInfo(State, ToR, FromTy, ToTy, CastSucceeds);
     return C.addTransition(State);
diff --git a/clang/test/Analysis/cast-trust-base-to-derived.cpp b/clang/test/Analysis/cast-trust-base-to-derived.cpp
index 0d374aa3d42751..117680acb7c148 100644
--- a/clang/test/Analysis/cast-trust-base-to-derived.cpp
+++ b/clang/test/Analysis/cast-trust-base-to-derived.cpp
@@ -121,3 +121,34 @@ bool PParent::OnMessageReceived() {
   Parent* foo = static_cast<Parent*>(this);
   return foo->RecvCancel(port);
 }
+
+namespace BaseToDerivedRef {
+struct Base {
+  virtual int number() const { return 1; }
+};
+struct Derived : Base {
+  int number() const override { return 2; }
+};
+void top(Base &B) {
+  clang_analyzer_dump(B.number()); // expected-warning{{1}} expected-warning{{conj}}
+  Derived &&D = static_cast<Derived &&>(B);
+  clang_analyzer_dump(B.number()); // expected-warning{{2}} expected-warning{{conj}}
+  clang_analyzer_dump(D.number()); // expected-warning{{2}} expected-warning{{conj}}
+}
+} // namespace BaseToDerivedRef
+
+
+namespace BaseToDerivedPtr {
+struct Base {
+  virtual int number() const { return 1; }
+};
+struct Derived : Base {
+  int number() const override { return 2; }
+};
+void top(Base *B) {
+  clang_analyzer_dump(B->number()); // expected-warning{{1}} expected-warning{{conj}}
+  auto *D = static_cast<Derived *>(B);
+  clang_analyzer_dump(B->number()); // expected-warning{{2}} expected-warning{{conj}}
+  clang_analyzer_dump(D->number()); // expected-warning{{2}} expected-warning{{conj}}
+}
+} // namespace BaseToDerivedPtr



More information about the cfe-commits mailing list