[clang] [analyzer] Refine LCV handling in Store for better taint propagation (PR #114835)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 9 07:06:13 PST 2024
https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/114835
>From 26f0cfabe3328c8eb8a861dd5d1d541921499f0c Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 9 Nov 2024 15:55:08 +0100
Subject: [PATCH 1/5] [analyzer][NFC] Make RegionStore dumps deterministic
Dump the memory space clusters before the other clusters, in
alphabetical order. Then default bindings over direct bindings, and if
any has symbolic offset, then those should come before the ones with
concrete offsets.
In theory, we should either have a symbolic offset OR concrete offsets,
but never both at the same time.
---
clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 86 ++++++++++++++++---
1 file changed, 73 insertions(+), 13 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 674099dd7e1f0f..6bad9a93a30169 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -67,9 +67,10 @@ class BindingKey {
isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) &&
"Not a base");
}
-public:
+public:
bool isDirect() const { return P.getInt() & Direct; }
+ bool isDefault() const { return !isDirect(); }
bool hasSymbolicOffset() const { return P.getInt() & Symbolic; }
const MemRegion *getRegion() const { return P.getPointer(); }
@@ -232,27 +233,86 @@ class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,
void printJson(raw_ostream &Out, const char *NL = "\n",
unsigned int Space = 0, bool IsDot = false) const {
- for (iterator I = begin(), E = end(); I != E; ++I) {
- // TODO: We might need a .printJson for I.getKey() as well.
+ using namespace llvm;
+ DenseMap<const MemRegion *, std::string> StringifyCache;
+ auto ToString = [&StringifyCache](const MemRegion *R) {
+ auto [Place, Inserted] = StringifyCache.try_emplace(R);
+ if (!Inserted)
+ return Place->second;
+ std::string Res;
+ raw_string_ostream OS(Res);
+ OS << R;
+ Place->second = Res;
+ return Res;
+ };
+
+ using Cluster =
+ std::pair<const MemRegion *, ImmutableMap<BindingKey, SVal>>;
+ using Binding = std::pair<BindingKey, SVal>;
+
+ const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L,
+ const Cluster *R) {
+ if (isa<MemSpaceRegion>(L->first) && !isa<MemSpaceRegion>(R->first))
+ return true;
+ if (!isa<MemSpaceRegion>(L->first) && isa<MemSpaceRegion>(R->first))
+ return false;
+ return ToString(L->first) < ToString(R->first);
+ };
+
+ const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L,
+ const BindingKey &R) {
+ if (L.hasSymbolicOffset() && !R.hasSymbolicOffset())
+ return true;
+ if (!L.hasSymbolicOffset() && R.hasSymbolicOffset())
+ return false;
+ if (L.hasSymbolicOffset() && R.hasSymbolicOffset())
+ return ToString(L.getRegion()) < ToString(R.getRegion());
+ return L.getOffset() < R.getOffset();
+ };
+
+ const auto DefaultBindingBeforeDirectBindings =
+ [&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) {
+ const BindingKey &L = LPtr->first;
+ const BindingKey &R = RPtr->first;
+ if (L.isDefault() && !R.isDefault())
+ return true;
+ if (!L.isDefault() && R.isDefault())
+ return false;
+ assert(L.isDefault() == R.isDefault());
+ return SymbolicBeforeOffset(L, R);
+ };
+
+ const auto AddrOf = [](const auto &Item) { return &Item; };
+
+ std::vector<const Cluster *> SortedClusters;
+ SortedClusters.reserve(std::distance(begin(), end()));
+ append_range(SortedClusters, map_range(*this, AddrOf));
+ llvm::sort(SortedClusters, MemSpaceBeforeRegionName);
+
+ for (auto [Idx, C] : llvm::enumerate(SortedClusters)) {
+ const auto &[BaseRegion, Bindings] = *C;
Indent(Out, Space, IsDot)
- << "{ \"cluster\": \"" << I.getKey() << "\", \"pointer\": \""
- << (const void *)I.getKey() << "\", \"items\": [" << NL;
+ << "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \""
+ << (const void *)BaseRegion << "\", \"items\": [" << NL;
+
+ std::vector<const Binding *> SortedBindings;
+ SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end()));
+ append_range(SortedBindings, map_range(Bindings, AddrOf));
+ llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings);
++Space;
- const ClusterBindings &CB = I.getData();
- for (ClusterBindings::iterator CI = CB.begin(), CE = CB.end(); CI != CE;
- ++CI) {
- Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": ";
- CI.getData().printJson(Out, /*AddQuotes=*/true);
+ for (auto [Idx, B] : llvm::enumerate(SortedBindings)) {
+ const auto &[Key, Value] = *B;
+ Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": ";
+ Value.printJson(Out, /*AddQuotes=*/true);
Out << " }";
- if (std::next(CI) != CE)
+ if (Idx != SortedBindings.size() - 1)
Out << ',';
Out << NL;
}
-
--Space;
Indent(Out, Space, IsDot) << "]}";
- if (std::next(I) != E)
+ if (Idx != SortedClusters.size() - 1)
Out << ',';
Out << NL;
}
>From efa54ca20448e3af84dc9ea5d5bb9dbfbf021ed9 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 2 Nov 2024 14:13:00 +0100
Subject: [PATCH 2/5] [analyzer] Allow copying empty structs (1/4)
We represent copies of structs by LazyCompoundVals, that is basically a
snapshot of the Store and Region that your copy would refer to.
This snapshot is actually not taken for empty structs (structs that have
no non-static data members), because the users won't be able to access
any fields anyways, so why bother.
However, when it comes to taint propagation, it would be nice if
instances of empty structs would behave similar to non-empty structs.
For this, we need an identity for which taint can bind, so Unknown -
that was used in the past wouldn't work.
Consequently, copying the value of an empty struct should behave the
same way as a non-empty struct, thus be represented by a
LazyCompoundVal.
---
clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 14 +--
clang/test/Analysis/ctor-trivial-copy.cpp | 101 ++++++++++++++++--
2 files changed, 99 insertions(+), 16 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 6bad9a93a30169..f46282a73fbe40 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -2336,20 +2336,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
}
-static bool isRecordEmpty(const RecordDecl *RD) {
- if (!RD->field_empty())
- return false;
- if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
- return CRD->getNumBases() == 0;
- return true;
-}
-
SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
const TypedValueRegion *R) {
const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
- if (!RD->getDefinition() || isRecordEmpty(RD))
+ if (!RD->getDefinition())
return UnknownVal();
+ // We also create a LCV for copying empty structs because then the store
+ // behavior doesn't depend on the struct layout.
+ // This way even an empty struct can carry taint, no matter if creduce drops
+ // the last field member or not.
return createLazyBinding(B, R);
}
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index 5ed188aa8f1eae..a1209c24136e20 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -1,8 +1,12 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \
+// RUN: 2>&1 | FileCheck %s
-template<typename T>
-void clang_analyzer_dump(T&);
+void clang_analyzer_printState();
+template<typename T> void clang_analyzer_dump_lref(T&);
+template<typename T> void clang_analyzer_dump_val(T);
+template <typename T> T conjure();
+template <typename... Ts> void nop(const Ts &... args) {}
struct aggr {
int x;
@@ -15,20 +19,103 @@ struct empty {
void test_copy_return() {
aggr s1 = {1, 2};
aggr const& cr1 = aggr(s1);
- clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
+ clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
empty s2;
empty const& cr2 = empty{s2};
- clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
+ clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
}
void test_assign_return() {
aggr s1 = {1, 2};
aggr d1;
- clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }}
+ clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }}
empty s2;
empty d2;
- clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown
+ clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown
}
+
+namespace trivial_struct_copy {
+
+void _01_empty_structs() {
+ clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}}
+ empty Empty = conjure<empty>();
+ empty Empty2 = Empty;
+ empty Empty3 = Empty2;
+ // All of these should refer to the exact same LCV, because all of
+ // these trivial copies refer to the original conjured value.
+ // There were Unknown before:
+ clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
+
+ // Notice that we don't have entries for the copies, only for the original "Empty" object.
+ // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision.
+ // The copies are not present because the ExprEngine skips the evalBind of empty structs.
+ // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields,
+ // and there are no fields within "Empty", thus we wouldn't have any entries anyways.
+ clang_analyzer_printState();
+ // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+ // CHECK-NEXT: ]}
+ // CHECK-NEXT: ]},
+
+ nop(Empty, Empty2, Empty3);
+}
+
+void _02_structs_with_members() {
+ clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}}
+ aggr Aggr = conjure<aggr>();
+ aggr Aggr2 = Aggr;
+ aggr Aggr3 = Aggr2;
+ // All of these should refer to the exact same LCV, because all of
+ // these trivial copies refer to the original conjured value.
+ clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
+ clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}
+
+ // We have fields in the struct we copy, thus we also have the entries for the copies
+ // (and for all of their fields).
+ clang_analyzer_printState();
+ // CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
+ // CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
+ // CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
+ // CHECK-NEXT: ]}
+ // CHECK-NEXT: ]},
+
+ nop(Aggr, Aggr2, Aggr3);
+}
+
+// Tests that use `clang_analyzer_printState()` must share the analysis entry
+// point, and have a strict ordering between. This is to meet the different
+// `clang_analyzer_printState()` calls in a fixed relative ordering, thus
+// FileCheck could check the stdouts.
+void entrypoint() {
+ _01_empty_structs();
+ _02_structs_with_members();
+}
+
+} // namespace trivial_struct_copy
>From 2027109450bf30ec9084c01977b5bf80a11062aa Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sun, 3 Nov 2024 09:41:33 +0100
Subject: [PATCH 3/5] [analyzer] Don't copy field-by-field conjured
LazyCompoundVals (2/4)
---
clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 39 +++++++++++++++++++
clang/test/Analysis/ctor-trivial-copy.cpp | 11 +++---
2 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index f46282a73fbe40..b54ba43ff42414 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -608,6 +608,12 @@ class RegionStoreManager : public StoreManager {
return getBinding(getRegionBindings(S), L, T);
}
+ /// Returns the value of the default binding of region \p BaseR
+ /// if and only if that is the unique binding in the cluster of \p BaseR.
+ /// \p BaseR must be a base region.
+ std::optional<SVal> getUniqueDefaultBinding(Store S,
+ const MemRegion *BaseR) const;
+
std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
// Default bindings are always applied over a base region so look up the
@@ -2605,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
return NewB;
}
+std::optional<SVal>
+RegionStoreManager::getUniqueDefaultBinding(Store S,
+ const MemRegion *BaseR) const {
+ assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region");
+ const auto *Cluster = getRegionBindings(S).lookup(BaseR);
+ if (!Cluster || !llvm::hasSingleElement(*Cluster))
+ return std::nullopt;
+
+ const auto [Key, Value] = *Cluster->begin();
+ return Key.isDirect() ? std::optional<SVal>{} : Value;
+}
+
std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct(
RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD,
nonloc::LazyCompoundVal LCV) {
+ // If we try to copy a Conjured value representing the value of the whole
+ // struct, don't try to element-wise copy each field.
+ // That would unnecessarily bind Derived symbols slicing off the subregion for
+ // the field from the whole Conjured symbol.
+ //
+ // struct Window { int width; int height; };
+ // Window getWindow(); <-- opaque fn.
+ // Window w = getWindow(); <-- conjures a new Window.
+ // Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct"
+ //
+ // We should not end up with a new Store for "w2" like this:
+ // Direct [ 0..31]: Derived{Conj{}, w.width}
+ // Direct [32..63]: Derived{Conj{}, w.height}
+ // Instead, we should just bind that Conjured value instead.
+ if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) {
+ if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) {
+ return B.addBinding(BindingKey::Make(R, BindingKey::Default),
+ Val.value());
+ }
+ }
+
FieldVector Fields;
if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index a1209c24136e20..ab623c1919e152 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -83,8 +83,9 @@ void _02_structs_with_members() {
clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}
- // We have fields in the struct we copy, thus we also have the entries for the copies
- // (and for all of their fields).
+ // We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3".
+ // We used to have Derived symbols for the individual fields that were
+ // copied as part of copying the whole struct.
clang_analyzer_printState();
// CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
@@ -97,12 +98,10 @@ void _02_structs_with_members() {
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
- // CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
- // CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
- // CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
- // CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: ]}
// CHECK-NEXT: ]},
>From c6d0aed9cf07522734cb1135868d168811834e6d Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 2 Nov 2024 14:15:41 +0100
Subject: [PATCH 4/5] [analyzer] Trigger copy event when copying empty structs
(3/4)
Previously, ExprEngine would just skip copying empty structs.
Let's make trigger the copy event even for empty structs.
---
.../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 26 +++++++------------
clang/test/Analysis/ctor-trivial-copy.cpp | 12 +++++----
2 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index ccc3097e8d2f97..17ee1f7c945edd 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -68,23 +68,15 @@ void ExprEngine::performTrivialCopy(NodeBuilder &Bldr, ExplodedNode *Pred,
Bldr.takeNodes(Pred);
assert(ThisRD);
- if (!ThisRD->isEmpty()) {
- // Load the source value only for non-empty classes.
- // Otherwise it'd retrieve an UnknownVal
- // and bind it and RegionStore would think that the actual value
- // in this region at this offset is unknown.
- SVal V = Call.getArgSVal(0);
-
- // If the value being copied is not unknown, load from its location to get
- // an aggregate rvalue.
- if (std::optional<Loc> L = V.getAs<Loc>())
- V = Pred->getState()->getSVal(*L);
- else
- assert(V.isUnknownOrUndef());
- evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
- } else {
- Dst.Add(Pred);
- }
+ SVal V = Call.getArgSVal(0);
+
+ // If the value being copied is not unknown, load from its location to get
+ // an aggregate rvalue.
+ if (std::optional<Loc> L = V.getAs<Loc>())
+ V = Pred->getState()->getSVal(*L);
+ else
+ assert(V.isUnknownOrUndef());
+ evalBind(Dst, CallExpr, Pred, ThisVal, V, true);
PostStmt PS(CallExpr, LCtx);
for (ExplodedNode *N : Dst) {
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index ab623c1919e152..41d0d97161bba1 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -51,11 +51,7 @@ void _01_empty_structs() {
clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
- // Notice that we don't have entries for the copies, only for the original "Empty" object.
- // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision.
- // The copies are not present because the ExprEngine skips the evalBind of empty structs.
- // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields,
- // and there are no fields within "Empty", thus we wouldn't have any entries anyways.
+ // We should have the same Conjured symbol for "Empty", "Empty2" and "Empty3".
clang_analyzer_printState();
// CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
@@ -66,6 +62,12 @@ void _01_empty_structs() {
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
+ // CHECK-NEXT: ]},
+ // CHECK-NEXT: { "cluster": "Empty3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+ // CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ]]" }
// CHECK-NEXT: ]}
// CHECK-NEXT: ]},
>From a8e603bf7642711c6d7f9994d7f0e111bb33ab12 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 2 Nov 2024 19:25:46 +0100
Subject: [PATCH 5/5] [analyzer][taint] Recognize tainted LazyCompoundVals
(4/4)
Taint propagation rules may want to taint whole objects, that are
returned by-value from opaque function calls.
If a struct is returned by-value from an opaque call, the "value" of the
whole struct is represented by a Conjured symbol.
Later fields may slice off smaller subregions by creating Derived
symbols of that Conjured symbol, but those are handled well, and
"isTainted" returns true as expected.
However, passing the whole struct to "isTainted" would be false, because
LazyCompoundVals and CompoundVals are not handled.
This patch addresses this.
Fixes #114270
---
clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 8 ++++++
clang/test/Analysis/taint-generic.cpp | 29 +++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index 0bb5739db4b756..e55d064253b844 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -207,6 +207,14 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly);
if (const MemRegion *Reg = V.getAsRegion())
return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly);
+
+ if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+ StoreManager &StoreMgr = State->getStateManager().getStoreManager();
+ if (auto DefaultVal = StoreMgr.getDefaultBinding(*LCV)) {
+ return getTaintedSymbolsImpl(State, *DefaultVal, Kind, returnFirstOnly);
+ }
+ }
+
return {};
}
diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp
index 8092ac6f270b2a..881c5baf889f6c 100644
--- a/clang/test/Analysis/taint-generic.cpp
+++ b/clang/test/Analysis/taint-generic.cpp
@@ -1,10 +1,15 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,alpha.security.ArrayBoundV2 -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify -std=c++11 %s
+// RUN: %clang_analyze_cc1 -std=c++11 -Wno-format-security \
+// RUN: -analyzer-checker=core,optin.taint,alpha.security.ArrayBoundV2,debug.ExprInspection \
+// RUN: -analyzer-config optin.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml \
+// RUN: -verify %s
+
+template <typename T> void clang_analyzer_isTainted(T);
#define BUFSIZE 10
int Buffer[BUFSIZE];
int scanf(const char*, ...);
-int mySource1();
+template <typename T = int> T mySource1();
int mySource3();
typedef struct _FILE FILE;
@@ -136,3 +141,23 @@ void testReadingFromStdin(char **p) {
fscanf(stdin, "%d", &n);
Buffer[n] = 1; // expected-warning {{Potential out of bound access }}
}
+
+namespace gh114270 {
+class Empty {};
+class Aggr {
+public:
+ int data;
+};
+
+void top() {
+ int Int = mySource1<int>();
+ clang_analyzer_isTainted(Int); // expected-warning {{YES}}
+
+ Empty E = mySource1<Empty>();
+ clang_analyzer_isTainted(E); // expected-warning {{YES}}
+
+ Aggr A = mySource1<Aggr>();
+ clang_analyzer_isTainted(A); // expected-warning {{YES}}
+ clang_analyzer_isTainted(A.data); // expected-warning {{YES}}
+}
+} // namespace gh114270
More information about the cfe-commits
mailing list