[clang] a80418e - [analyzer] Improve loads from reinterpret-cast fields
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 26 03:31:50 PDT 2022
Author: Balazs Benics
Date: 2022-07-26T12:31:21+02:00
New Revision: a80418eec001d91f9573456b31704a350e421560
URL: https://github.com/llvm/llvm-project/commit/a80418eec001d91f9573456b31704a350e421560
DIFF: https://github.com/llvm/llvm-project/commit/a80418eec001d91f9573456b31704a350e421560.diff
LOG: [analyzer] Improve loads from reinterpret-cast fields
Consider this example:
```lang=C++
struct header {
unsigned a : 1;
unsigned b : 1;
};
struct parse_t {
unsigned bits0 : 1;
unsigned bits2 : 2; // <-- header
unsigned bits4 : 4;
};
int parse(parse_t *p) {
unsigned copy = p->bits2;
clang_analyzer_dump(copy);
// expected-warning at -1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}
header *bits = (header *)©
clang_analyzer_dump(bits->b); // <--- Was UndefinedVal previously.
// expected-warning at -1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
return bits->b; // no-warning: it's not UndefinedVal
}
```
`bits->b` should have the same content as the second bit of `p->bits2`
(assuming that the bitfields are in spelling order).
---
The `Store` has the correct bindings. The problem is with the load of `bits->b`.
It will eventually reach `RegionStoreManager::getBindingForField()` with
`Element{copy,0 S64b,struct header}.b`, which is a `FieldRegion`.
It did not find any direct bindings, so the `getBindingForFieldOrElementCommon()`
gets called. That won't find any bindings, but it sees that the variable
is on the //stack//, thus it must be an uninitialized local variable;
thus it returns `UndefinedVal`.
Instead of doing this, it should have created a //derived symbol//
representing the slice of the region corresponding to the member.
So, if the value of `copy` is `reg1`, then the value of `bits->b` should
be `derived{reg1, elem{copy,0, header}.b}`.
Actually, the `getBindingForElement()` already does exactly this for
reinterpret-casts, so I decided to hoist that and reuse the logic.
Fixes #55934
Reviewed By: martong
Differential Revision: https://reviews.llvm.org/D128535
Added:
Modified:
clang/lib/StaticAnalyzer/Core/RegionStore.cpp
clang/test/Analysis/ptr-arith.cpp
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 5e946483a93d..d8ece9f39a25 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -1888,6 +1888,30 @@ SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
return svalBuilder.makeIntVal(Code, ElemT);
}
+static Optional<SVal> getDerivedSymbolForBinding(
+ RegionBindingsConstRef B, const TypedValueRegion *BaseRegion,
+ const TypedValueRegion *SubReg, const ASTContext &Ctx, SValBuilder &SVB) {
+ assert(BaseRegion);
+ QualType BaseTy = BaseRegion->getValueType();
+ QualType Ty = SubReg->getValueType();
+ if (BaseTy->isScalarType() && Ty->isScalarType()) {
+ if (Ctx.getTypeSizeInChars(BaseTy) >= Ctx.getTypeSizeInChars(Ty)) {
+ if (const Optional<SVal> &ParentValue = B.getDirectBinding(BaseRegion)) {
+ if (SymbolRef ParentValueAsSym = ParentValue->getAsSymbol())
+ return SVB.getDerivedRegionValueSymbolVal(ParentValueAsSym, SubReg);
+
+ if (ParentValue->isUndef())
+ return UndefinedVal();
+
+ // Other cases: give up. We are indexing into a larger object
+ // that has some value, but we don't know how to handle that yet.
+ return UnknownVal();
+ }
+ }
+ }
+ return None;
+}
+
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
const ElementRegion* R) {
// Check if the region has a binding.
@@ -1932,27 +1956,10 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
if (!O.getRegion())
return UnknownVal();
- if (const TypedValueRegion *baseR =
- dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
- QualType baseT = baseR->getValueType();
- if (baseT->isScalarType()) {
- QualType elemT = R->getElementType();
- if (elemT->isScalarType()) {
- if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) {
- if (const Optional<SVal> &V = B.getDirectBinding(superR)) {
- if (SymbolRef parentSym = V->getAsSymbol())
- return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
-
- if (V->isUnknownOrUndef())
- return *V;
- // Other cases: give up. We are indexing into a larger object
- // that has some value, but we don't know how to handle that yet.
- return UnknownVal();
- }
- }
- }
- }
- }
+ if (const TypedValueRegion *baseR = dyn_cast<TypedValueRegion>(O.getRegion()))
+ if (auto V = getDerivedSymbolForBinding(B, baseR, R, Ctx, svalBuilder))
+ return *V;
+
return getBindingForFieldOrElementCommon(B, R, R->getElementType());
}
@@ -1988,6 +1995,26 @@ SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
}
}
+ // Handle the case where we are accessing into a larger scalar object.
+ // For example, this handles:
+ // struct header {
+ // unsigned a : 1;
+ // unsigned b : 1;
+ // };
+ // struct parse_t {
+ // unsigned bits0 : 1;
+ // unsigned bits2 : 2; // <-- header
+ // unsigned bits4 : 4;
+ // };
+ // int parse(parse_t *p) {
+ // unsigned copy = p->bits2;
+ // header *bits = (header *)©
+ // return bits->b; <-- here
+ // }
+ if (const auto *Base = dyn_cast<TypedValueRegion>(R->getBaseRegion()))
+ if (auto V = getDerivedSymbolForBinding(B, Base, R, Ctx, svalBuilder))
+ return *V;
+
return getBindingForFieldOrElementCommon(B, R, Ty);
}
diff --git a/clang/test/Analysis/ptr-arith.cpp b/clang/test/Analysis/ptr-arith.cpp
index 1eec83c643e1..a8d90fa63b29 100644
--- a/clang/test/Analysis/ptr-arith.cpp
+++ b/clang/test/Analysis/ptr-arith.cpp
@@ -1,4 +1,7 @@
// RUN: %clang_analyze_cc1 -Wno-unused-value -std=c++14 -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s
+
+template <typename T> void clang_analyzer_dump(T);
+
struct X {
int *p;
int zero;
@@ -117,3 +120,24 @@ bool integerAsPtrSubtractionNoCrash(char *p, __UINTPTR_TYPE__ m) {
auto n = p - reinterpret_cast<char*>((__UINTPTR_TYPE__)1);
return n == m;
}
+
+namespace Bug_55934 {
+struct header {
+ unsigned a : 1;
+ unsigned b : 1;
+};
+struct parse_t {
+ unsigned bits0 : 1;
+ unsigned bits2 : 2; // <-- header
+ unsigned bits4 : 4;
+};
+int parse(parse_t *p) {
+ unsigned copy = p->bits2;
+ clang_analyzer_dump(copy);
+ // expected-warning at -1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}
+ header *bits = (header *)©
+ clang_analyzer_dump(bits->b);
+ // expected-warning at -1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
+ return bits->b; // no-warning
+}
+} // namespace Bug_55934
More information about the cfe-commits
mailing list