[clang] [analyzer][taint] Recognize tainted LazyCompoundVals (4/4) (PR #115919)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 14 07:32:12 PST 2024


https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/115919

>From 697e09d0bc97230240b3d127a310e49ddd4d44b7 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 2 Nov 2024 14:15:41 +0100
Subject: [PATCH 1/2] [analyzer] Trigger copy event when copying empty structs
 (3/4)

Previously, ExprEngine would just skip copying empty structs.
Let's make trigger the copy event even for empty structs.
---
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 26 +++++++------------
 clang/test/Analysis/ctor-trivial-copy.cpp     | 12 +++++----
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index ccc3097e8d2f97..17ee1f7c945edd 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred,
   Bldr.takeNodes(Pred);
 
   assert(ThisRD);
-  if (!ThisRD->isEmpty()) {
-    // Load the source value only for non-empty classes.
-    // Otherwise it'd retrieve an UnknownVal
-    // and bind it and RegionStore would think that the actual value
-    // in this region at this offset is unknown.
-    SVal V = Call.getArgSVal(0);
-
-    // If the value being copied is not unknown, load from its location to get
-    // an aggregate rvalue.
-    if (std::optional<Loc> L = V.getAs<Loc>())
-      V = Pred->getState()->getSVal(*L);
-    else
-      assert(V.isUnknownOrUndef());
-    evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
-  } else {
-    Dst.Add(Pred);
-  }
+  SVal V = Call.getArgSVal(0);
+
+  // If the value being copied is not unknown, load from its location to get
+  // an aggregate rvalue.
+  if (std::optional<Loc> L = V.getAs<Loc>())
+    V = Pred->getState()->getSVal(*L);
+  else
+    assert(V.isUnknownOrUndef());
+  evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
 
   PostStmt PS(CallExpr, LCtx);
   for (ExplodedNode *N : Dst) {
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index ab623c1919e152..41d0d97161bba1 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -51,11 +51,7 @@ void _01_empty_structs() {
   clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
   clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
 
-  // Notice that we don't have entries for the copies, only for the original "Empty" object.
-  // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision.
-  // The copies are not present because the ExprEngine skips the evalBind of empty structs.
-  // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields,
-  // and there are no fields within "Empty", thus we wouldn't have any entries anyways.
+  // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3".
   clang_analyzer_printState();
   // CHECK:       "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
   // CHECK-NEXT:    { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
@@ -66,6 +62,12 @@ void _01_empty_structs() {
   // CHECK-NEXT:    ]},
   // CHECK-NEXT:    { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
   // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
   // CHECK-NEXT:    ]}
   // CHECK-NEXT:  ]},
 

>From 00c5d68cf2b840010dee3ed62c9c4fadc5872c8f Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 2 Nov 2024 19:25:46 +0100
Subject: [PATCH 2/2] [analyzer][taint] Recognize tainted LazyCompoundVals
 (4/4)

Taint propagation rules may want to taint whole objects, that are
returned by-value from opaque function calls.
If a struct is returned by-value from an opaque call, the "value" of the
whole struct is represented by a Conjured symbol.
Later fields may slice off smaller subregions by creating Derived
symbols of that Conjured symbol, but those are handled well, and
"isTainted" returns true as expected.

However, passing the whole struct to "isTainted" would be false, because
LazyCompoundVals and CompoundVals are not handled.
This patch addresses this.

Fixes #114270
---
 clang/lib/StaticAnalyzer/Checkers/Taint.cpp |  8 ++++++
 clang/test/Analysis/taint-generic.cpp       | 29 +++++++++++++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 0bb5739db4b756..e55d064253b844 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -207,6 +207,14 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
     return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
   if (const MemRegion *Reg = V.getAsRegion())
     return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
+
+  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+    StoreManager &StoreMgr = State->getStateManager().getStoreManager();
+    if (auto DefaultVal = StoreMgr.getDefaultBinding(*LCV)) {
+      return getTaintedSymbolsImpl(State, *DefaultVal, Kind, returnFirstOnly);
+    }
+  }
+
   return {};
 }
 
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 8092ac6f270b2a..881c5baf889f6c 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -1,10 +1,15 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=optin.taint,core,alpha.security.ArrayBoundV2 -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s
+// RUN: %clang_analyze_cc1 -std=c++11 -Wno-format-security \
+// RUN:   -analyzer-checker=core,optin.taint,alpha.security.ArrayBoundV2,debug.ExprInspection \
+// RUN:   -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml \
+// RUN:   -verify %s
+
+template <typename T> void clang_analyzer_isTainted(T);
 
 #define BUFSIZE 10
 int Buffer[BUFSIZE];
 
 int scanf(const char*, ...);
-int mySource1();
+template <typename T = int> T mySource1();
 int mySource3();
 
 typedef struct _FILE FILE;
@@ -136,3 +141,23 @@ void testReadingFromStdin(char **p) {
   fscanf(stdin, "%d", &n);
   Buffer[n] = 1; // expected-warning {{Potential out of bound access }}
 }
+
+namespace gh114270 {
+class Empty {};
+class Aggr {
+public:
+  int data;
+};
+
+void top() {
+  int Int = mySource1<int>();
+  clang_analyzer_isTainted(Int); // expected-warning {{YES}}
+
+  Empty E = mySource1<Empty>();
+  clang_analyzer_isTainted(E); // expected-warning {{YES}}
+
+  Aggr A = mySource1<Aggr>();
+  clang_analyzer_isTainted(A);      // expected-warning {{YES}}
+  clang_analyzer_isTainted(A.data); // expected-warning {{YES}}
+}
+} // namespace gh114270



More information about the cfe-commits mailing list