[clang] a80418e - [analyzer] Improve loads from reinterpret-cast fields

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 26 03:31:50 PDT 2022


Author: Balazs Benics
Date: 2022-07-26T12:31:21+02:00
New Revision: a80418eec001d91f9573456b31704a350e421560

URL: https://github.com/llvm/llvm-project/commit/a80418eec001d91f9573456b31704a350e421560
DIFF: https://github.com/llvm/llvm-project/commit/a80418eec001d91f9573456b31704a350e421560.diff

LOG: [analyzer] Improve loads from reinterpret-cast fields

Consider this example:

```lang=C++
struct header {
  unsigned a : 1;
  unsigned b : 1;
};
struct parse_t {
  unsigned bits0 : 1;
  unsigned bits2 : 2; // <-- header
  unsigned bits4 : 4;
};
int parse(parse_t *p) {
  unsigned copy = p->bits2;
  clang_analyzer_dump(copy);
  // expected-warning at -1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}

  header *bits = (header *)©
  clang_analyzer_dump(bits->b); // <--- Was UndefinedVal previously.
  // expected-warning at -1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
  return bits->b; // no-warning: it's not UndefinedVal
}
```

`bits->b` should have the same content as the second bit of `p->bits2`
(assuming that the bitfields are in spelling order).

---

The `Store` has the correct bindings. The problem is with the load of `bits->b`.
It will eventually reach `RegionStoreManager::getBindingForField()` with
`Element{copy,0 S64b,struct header}.b`, which is a `FieldRegion`.
It did not find any direct bindings, so the `getBindingForFieldOrElementCommon()`
gets called. That won't find any bindings, but it sees that the variable
is on the //stack//, thus it must be an uninitialized local variable;
thus it returns `UndefinedVal`.

Instead of doing this, it should have created a //derived symbol//
representing the slice of the region corresponding to the member.
So, if the value of `copy` is `reg1`, then the value of `bits->b` should
be `derived{reg1, elem{copy,0, header}.b}`.

Actually, the `getBindingForElement()` already does exactly this for
reinterpret-casts, so I decided to hoist that and reuse the logic.

Fixes #55934

Reviewed By: martong

Differential Revision: https://reviews.llvm.org/D128535

Added: 
    

Modified: 
    clang/lib/StaticAnalyzer/Core/RegionStore.cpp
    clang/test/Analysis/ptr-arith.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 5e946483a93d..d8ece9f39a25 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -1888,6 +1888,30 @@ SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
   return svalBuilder.makeIntVal(Code, ElemT);
 }
 
+static Optional<SVal> getDerivedSymbolForBinding(
+    RegionBindingsConstRef B, const TypedValueRegion *BaseRegion,
+    const TypedValueRegion *SubReg, const ASTContext &Ctx, SValBuilder &SVB) {
+  assert(BaseRegion);
+  QualType BaseTy = BaseRegion->getValueType();
+  QualType Ty = SubReg->getValueType();
+  if (BaseTy->isScalarType() && Ty->isScalarType()) {
+    if (Ctx.getTypeSizeInChars(BaseTy) >= Ctx.getTypeSizeInChars(Ty)) {
+      if (const Optional<SVal> &ParentValue = B.getDirectBinding(BaseRegion)) {
+        if (SymbolRef ParentValueAsSym = ParentValue->getAsSymbol())
+          return SVB.getDerivedRegionValueSymbolVal(ParentValueAsSym, SubReg);
+
+        if (ParentValue->isUndef())
+          return UndefinedVal();
+
+        // Other cases: give up.  We are indexing into a larger object
+        // that has some value, but we don't know how to handle that yet.
+        return UnknownVal();
+      }
+    }
+  }
+  return None;
+}
+
 SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
                                               const ElementRegion* R) {
   // Check if the region has a binding.
@@ -1932,27 +1956,10 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
   if (!O.getRegion())
     return UnknownVal();
 
-  if (const TypedValueRegion *baseR =
-        dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
-    QualType baseT = baseR->getValueType();
-    if (baseT->isScalarType()) {
-      QualType elemT = R->getElementType();
-      if (elemT->isScalarType()) {
-        if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) {
-          if (const Optional<SVal> &V = B.getDirectBinding(superR)) {
-            if (SymbolRef parentSym = V->getAsSymbol())
-              return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
-
-            if (V->isUnknownOrUndef())
-              return *V;
-            // Other cases: give up.  We are indexing into a larger object
-            // that has some value, but we don't know how to handle that yet.
-            return UnknownVal();
-          }
-        }
-      }
-    }
-  }
+  if (const TypedValueRegion *baseR = dyn_cast<TypedValueRegion>(O.getRegion()))
+    if (auto V = getDerivedSymbolForBinding(B, baseR, R, Ctx, svalBuilder))
+      return *V;
+
   return getBindingForFieldOrElementCommon(B, R, R->getElementType());
 }
 
@@ -1988,6 +1995,26 @@ SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
         }
   }
 
+  // Handle the case where we are accessing into a larger scalar object.
+  // For example, this handles:
+  //   struct header {
+  //     unsigned a : 1;
+  //     unsigned b : 1;
+  //   };
+  //   struct parse_t {
+  //     unsigned bits0 : 1;
+  //     unsigned bits2 : 2; // <-- header
+  //     unsigned bits4 : 4;
+  //   };
+  //   int parse(parse_t *p) {
+  //     unsigned copy = p->bits2;
+  //     header *bits = (header *)©
+  //     return bits->b;  <-- here
+  //   }
+  if (const auto *Base = dyn_cast<TypedValueRegion>(R->getBaseRegion()))
+    if (auto V = getDerivedSymbolForBinding(B, Base, R, Ctx, svalBuilder))
+      return *V;
+
   return getBindingForFieldOrElementCommon(B, R, Ty);
 }
 

diff  --git a/clang/test/Analysis/ptr-arith.cpp b/clang/test/Analysis/ptr-arith.cpp
index 1eec83c643e1..a8d90fa63b29 100644
--- a/clang/test/Analysis/ptr-arith.cpp
+++ b/clang/test/Analysis/ptr-arith.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_analyze_cc1 -Wno-unused-value -std=c++14 -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s
+
+template <typename T> void clang_analyzer_dump(T);
+
 struct X {
   int *p;
   int zero;
@@ -117,3 +120,24 @@ bool integerAsPtrSubtractionNoCrash(char *p, __UINTPTR_TYPE__ m) {
   auto n = p - reinterpret_cast<char*>((__UINTPTR_TYPE__)1);
   return n == m;
 }
+
+namespace Bug_55934 {
+struct header {
+  unsigned a : 1;
+  unsigned b : 1;
+};
+struct parse_t {
+  unsigned bits0 : 1;
+  unsigned bits2 : 2; // <-- header
+  unsigned bits4 : 4;
+};
+int parse(parse_t *p) {
+  unsigned copy = p->bits2;
+  clang_analyzer_dump(copy);
+  // expected-warning at -1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}
+  header *bits = (header *)©
+  clang_analyzer_dump(bits->b);
+  // expected-warning at -1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
+  return bits->b; // no-warning
+}
+} // namespace Bug_55934


        


More information about the cfe-commits mailing list