[clang] [analyzer] Check the correct first and last elements in cstring.UninitializedRead (PR #95408)

Kristóf Umann via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 20 01:30:00 PDT 2024


https://github.com/Szelethus updated https://github.com/llvm/llvm-project/pull/95408

>From d717b412749f10b45a9387044e97da6981f3cad4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Thu, 25 Apr 2024 17:31:24 +0200
Subject: [PATCH 1/4] [analyzer] Check the correct first and last elements in
 cstring.UninitializedRead

I intend to fix this checker up so that we can move it out of alpha. I
made a bunch of analyses, and found many similar false positives:

```c++
int t[] = {1,2,3};
memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn
```

The problem here is the way CStringChecker checks whether the
destination and source buffers are initialized: heuristically, it only
checks the first and last element. This is fine, however, it retrieves
these elements as characters, even if the underlaying object is not a
character array. Reading the last byte of an integer is undefined, so
the checker emits a bug here.

A quick search tells you the rationale: "Both objects are reinterpreted
as arrays of unsigned char.". But the static analyzer right now can't
check byte-by-byte if a memory region is _initialized_, it can only
check if its a well-defined character or not.

In this patch, I pry the original array out of the arguments to memcpy
(and similar functions), and retrieve the actual first and last elements
according to the array's actual element type.

Currently, my improvements reduced the number of reports to 29 on these
projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres

https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100

Before my patch, there were 87.

https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100

Change-Id: I0ef16beed735201af857abb6ed02de764af2e78a
---
 .../Core/PathSensitive/MemRegion.h            |  14 ++
 .../Checkers/CStringChecker.cpp               | 226 +++++++++++++++---
 clang/test/Analysis/bstring_UninitRead.c      |  66 ++++-
 3 files changed, 258 insertions(+), 48 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 151d3e57c1cb8..05d679b659029 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -34,6 +34,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
 #include <limits>
@@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode {
 #define REGION(Id, Parent) Id ## Kind,
 #define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
 #include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+#undef REGION_RANGE
   };
 
 private:
@@ -171,6 +174,17 @@ class MemRegion : public llvm::FoldingSetNode {
 
   Kind getKind() const { return kind; }
 
+  StringRef getKindStr() const {
+    switch (getKind()) {
+#define REGION(Id, Parent)                                                     \
+  case Id##Kind:                                                               \
+    return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+    }
+    llvm_unreachable("Unkown kind!");
+  }
+
   template<typename RegionTy> const RegionTy* getAs() const;
   template <typename RegionTy>
   LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 238e87a712a43..fdea2e01215b4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InterCheckerAPI.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -22,10 +23,13 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
 #include <functional>
 #include <optional>
@@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call,
   // Re-usable checks
   ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
                                AnyArgExpr Arg, SVal l) const;
+  // Check whether the origin region behind \p Element (like the actual array
+  // region \p Element is from) is initialized.
+  ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
+                            AnyArgExpr Buffer, SVal Element, SVal Size) const;
   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
                                 AnyArgExpr Buffer, SVal Element,
                                 AccessKind Access,
@@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call,
                          const Stmt *S, StringRef WarningMsg) const;
   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
-                             const Expr *E) const;
+                                const Expr *E, StringRef Msg) const;
   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
                                             ProgramStateRef state,
                                             NonLoc left,
@@ -393,6 +401,173 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
   return stateNonNull;
 }
 
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+                                      const ElementRegion *ER, CharKind CK) {
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  if (CK == CharKind::Regular) {
+    if (ER->getValueType() != Ctx.CharTy)
+      return {};
+    return ER->getIndex();
+  }
+
+  if (ER->getValueType() != Ctx.WideCharTy)
+    return {};
+
+  QualType SizeTy = Ctx.getSizeType();
+  NonLoc WideSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                      SizeTy)
+          .castAs<NonLoc>();
+  SVal Offset =
+      SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+  if (Offset.isUnknown())
+    return {};
+  return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin regin (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+  const MemRegion *MR = ER->getSuperRegion();
+  const MemRegion *Ret = MR;
+  assert(MR);
+  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+    SymbolRef sym2 = sym->getSymbol();
+    if (!sym2)
+      return nullptr;
+    Ret = sym2->getOriginRegion();
+  }
+  if (const auto *element = MR->getAs<ElementRegion>()) {
+    Ret = element->getBaseRegion();
+  }
+  return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
+
+// Basically 1 -> 1st, 12 -> 12th, etc.
+static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
+  Os << Idx << llvm::getOrdinalSuffix(Idx);
+}
+
+ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
+                                          ProgramStateRef State,
+                                          AnyArgExpr Buffer, SVal Element,
+                                          SVal Size) const {
+
+  // If a previous check has failed, propagate the failure.
+  if (!State)
+    return nullptr;
+
+  const MemRegion *R = Element.getAsRegion();
+  if (!R)
+    return State;
+
+  const auto *ER = dyn_cast<ElementRegion>(R);
+  if (!ER)
+    return State;
+
+  const TypedValueRegion *Orig = getOriginRegion(ER);
+  if (!Orig)
+    return State;
+
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  // FIXME: We ought to able to check objects as well. Maybe
+  // UninitializedObjectChecker could help?
+  if (!Orig->getValueType()->isArrayType())
+    return State;
+
+  const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+  const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+
+  SVal FirstElementVal =
+      State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+  if (!isa<Loc>(FirstElementVal))
+    return State;
+
+  // Ensure that we wouldn't read uninitialized value.
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The first element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument is undefined";
+    emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+    return nullptr;
+  }
+
+  // We won't check whether the entire region is fully initialized -- lets just
+  // check that the first and the last element is. So, onto checking the last
+  // element:
+  const QualType IdxTy = SValBuilder.getArrayIndexType();
+
+  NonLoc ElemSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+          .castAs<NonLoc>();
+
+  // FIXME: Check that the size arg to the cstring function is divisible by
+  // size of the actual element type?
+
+  // The type of the argument to the cstring function is either char or wchar,
+  // but thats not the type of the original array (or memory region).
+  // Suppose the following:
+  //   int t[5];
+  //   memcpy(dst, t, sizeof(t) / sizeof(t[0]));
+  // When checking whether t is fully initialized, we see it as char array of
+  // size sizeof(int)*5. If we check the last element as a character, we read
+  // the last byte of an integer, which will be undefined. But just because
+  // that value is undefined, it doesn't mean that the element is uninitialized!
+  // For this reason, we need to retrieve the actual last element with the
+  // correct type.
+
+  // Divide the size argument to the cstring function by the actual element
+  // type. This value will be size of the array, or the index to the
+  // past-the-end element.
+  std::optional<NonLoc> Offset =
+      SValBuilder
+          .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+                       IdxTy)
+          .getAs<NonLoc>();
+
+  // Retrieve the index of the last element.
+  const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
+  SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+
+  if (!Offset)
+    return State;
+
+  SVal LastElementVal =
+      State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+  if (!isa<Loc>(LastElementVal))
+    return State;
+
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
+    const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
+    // If we can't get emit a sensible last element index, just bail out --
+    // prefer to emit nothing in favour of emitting garbage quality reports.
+    if (!IdxInt) {
+      C.addSink();
+      return nullptr;
+    }
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The last element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument to access (the ";
+    printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
+    OS << ") is undefined";
+    emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+    return nullptr;
+  }
+  return State;
+}
+
 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
                                               ProgramStateRef state,
@@ -413,30 +588,10 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   if (!ER)
     return state;
 
-  SValBuilder &svalBuilder = C.getSValBuilder();
-  ASTContext &Ctx = svalBuilder.getContext();
-
   // Get the index of the accessed element.
-  NonLoc Idx = ER->getIndex();
-
-  if (CK == CharKind::Regular) {
-    if (ER->getValueType() != Ctx.CharTy)
-      return state;
-  } else {
-    if (ER->getValueType() != Ctx.WideCharTy)
-      return state;
-
-    QualType SizeTy = Ctx.getSizeType();
-    NonLoc WideSize =
-        svalBuilder
-            .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
-                        SizeTy)
-            .castAs<NonLoc>();
-    SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
-    if (Offset.isUnknown())
-      return state;
-    Idx = Offset.castAs<NonLoc>();
-  }
+  std::optional<NonLoc> Idx = getIndex(state, ER, CK);
+  if (!Idx)
+    return state;
 
   // Get the size of the array.
   const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
@@ -444,7 +599,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
       getDynamicExtent(state, superReg, C.getSValBuilder());
 
   ProgramStateRef StInBound, StOutBound;
-  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
+  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
     // These checks are either enabled by the CString out-of-bounds checker
     // explicitly or implicitly by the Malloc checker.
@@ -459,15 +614,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     return nullptr;
   }
 
-  // Ensure that we wouldn't read uninitialized value.
-  if (Access == AccessKind::read) {
-    if (Filter.CheckCStringUninitializedRead &&
-        StInBound->getSVal(ER).isUndef()) {
-      emitUninitializedReadBug(C, StInBound, Buffer.Expression);
-      return nullptr;
-    }
-  }
-
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   return StInBound;
@@ -502,6 +648,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
 
   // Check if the first byte of the buffer is accessible.
   State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
+
   if (!State)
     return nullptr;
 
@@ -526,6 +673,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
     SVal BufEnd =
         svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
     State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
+    if (Access == AccessKind::read)
+      State = checkInit(C, State, Buffer, BufEnd, *Length);
 
     // If the buffer isn't large enough, abort.
     if (!State)
@@ -694,16 +843,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
 
 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
                                               ProgramStateRef State,
-                                              const Expr *E) const {
+                                              const Expr *E,
+                                              StringRef Msg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    const char *Msg =
-        "Bytes string function accesses uninitialized/garbage values";
     if (!BT_UninitRead)
       BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
                                       "Accessing unitialized/garbage values"));
 
     auto Report =
         std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+    Report->addNote("Other elements might also be undefined",
+                    Report->getLocation());
     Report->addRange(E->getSourceRange());
     bugreporter::trackExpressionValue(N, E, *Report);
     C.emitReport(std::move(Report));
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index c535e018e62c2..97625a3650663 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -16,7 +16,31 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
 
 void top(char *dst) {
   char buf[10];
-  memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+  memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+void top2(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+void top3(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 1);
+  (void)buf;
+}
+
+void top4(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
@@ -26,16 +50,12 @@ void top(char *dst) {
 
 void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void mempcpy14() {
+void mempcpy13() {
   int src[] = {1, 2, 3, 4};
   int dst[5] = {0};
   int *p;
 
-  p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
-   // FIXME: This behaviour is actually surprising and needs to be fixed, 
-   // mempcpy seems to consider the very last byte of the src buffer uninitialized
-   // and returning undef unfortunately. It should have returned unknown or a conjured value instead.
-
+  p = mempcpy(dst, src, 4 * sizeof(int));
   clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
 }
 
@@ -52,8 +72,34 @@ void mempcpy15() {
   struct st *p2;
 
   p1 = (&s2) + 1;
-  p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
-  // FIXME: It seems same as mempcpy14() case.
-  
+
+  p2 = mempcpy(&s2, &s1, sizeof(struct st));
+
   clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
 }
+
+void mempcpy16() {
+  struct st s1;
+  struct st s2;
+
+  // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+  // initialized?
+  mempcpy(&s2, &s1, sizeof(struct st));
+}
+
+void initialized(int *dest) {
+  int t[] = {1, 2, 3};
+  memcpy(dest, t, sizeof(t));
+}
+
+// Creduced crash.
+
+void *ga_copy_strings_from_0;
+void *memmove();
+void alloc();
+void ga_copy_strings() {
+  int i = 0;
+  for (;; ++i)
+    memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1);
+}
+

>From 5c21678692f33bf3b11d28fd5f5a771482ba22ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Tue, 18 Jun 2024 13:30:03 +0200
Subject: [PATCH 2/4] Partial revisions according to reviewer comments

---
 .../Core/PathSensitive/MemRegion.h            | 11 +---
 .../Checkers/CStringChecker.cpp               | 52 ++++++++-----------
 clang/lib/StaticAnalyzer/Core/MemRegion.cpp   | 11 ++++
 clang/test/Analysis/bstring_UninitRead.c      | 52 ++++++++++---------
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 05d679b659029..b5cdacd4eef46 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -174,16 +174,7 @@ class MemRegion : public llvm::FoldingSetNode {
 
   Kind getKind() const { return kind; }
 
-  StringRef getKindStr() const {
-    switch (getKind()) {
-#define REGION(Id, Parent)                                                     \
-  case Id##Kind:                                                               \
-    return #Id;
-#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
-#undef REGION
-    }
-    llvm_unreachable("Unkown kind!");
-  }
+  StringRef getKindStr() const;
 
   template<typename RegionTy> const RegionTy* getAs() const;
   template <typename RegionTy>
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index fdea2e01215b4..eaa999ff70c5f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -360,15 +360,15 @@ REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
 //===----------------------------------------------------------------------===//
 
 std::pair<ProgramStateRef , ProgramStateRef >
-CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
+CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
                            QualType Ty) {
   std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
   if (!val)
-    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
+    return std::pair<ProgramStateRef , ProgramStateRef >(State, State);
 
   SValBuilder &svalBuilder = C.getSValBuilder();
   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
-  return state->assume(svalBuilder.evalEQ(state, *val, zero));
+  return State->assume(svalBuilder.evalEQ(State, *val, zero));
 }
 
 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
@@ -403,8 +403,8 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
 
 static std::optional<NonLoc> getIndex(ProgramStateRef State,
                                       const ElementRegion *ER, CharKind CK) {
-  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
-  ASTContext &Ctx = SValBuilder.getContext();
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SVB.getContext();
 
   if (CK == CharKind::Regular) {
     if (ER->getValueType() != Ctx.CharTy)
@@ -417,18 +417,17 @@ static std::optional<NonLoc> getIndex(ProgramStateRef State,
 
   QualType SizeTy = Ctx.getSizeType();
   NonLoc WideSize =
-      SValBuilder
-          .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
-                      SizeTy)
+      SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                     SizeTy)
           .castAs<NonLoc>();
   SVal Offset =
-      SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+      SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
   if (Offset.isUnknown())
     return {};
   return Offset.castAs<NonLoc>();
 }
 
-// Try to get hold of the origin regin (e.g. the actual array region from an
+// Try to get hold of the origin region (e.g. the actual array region from an
 // element region).
 static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
   const MemRegion *MR = ER->getSuperRegion();
@@ -440,8 +439,8 @@ static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
       return nullptr;
     Ret = sym2->getOriginRegion();
   }
-  if (const auto *element = MR->getAs<ElementRegion>()) {
-    Ret = element->getBaseRegion();
+  if (const auto *Elem = MR->getAs<ElementRegion>()) {
+    Ret = Elem->getBaseRegion();
   }
   return dyn_cast_or_null<TypedValueRegion>(Ret);
 }
@@ -472,21 +471,19 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!Orig)
     return State;
 
-  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
-  ASTContext &Ctx = SValBuilder.getContext();
-
   // FIXME: We ought to able to check objects as well. Maybe
   // UninitializedObjectChecker could help?
   if (!Orig->getValueType()->isArrayType())
     return State;
 
+  SValBuilder &SVB = C.getSValBuilder();
+  ASTContext &Ctx = SVB.getContext();
+
   const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
-  const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+  const NonLoc Zero = SVB.makeZeroArrayIndex();
 
-  SVal FirstElementVal =
+  Loc FirstElementVal =
       State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
-  if (!isa<Loc>(FirstElementVal))
-    return State;
 
   // Ensure that we wouldn't read uninitialized value.
   if (Filter.CheckCStringUninitializedRead &&
@@ -503,11 +500,10 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   // We won't check whether the entire region is fully initialized -- lets just
   // check that the first and the last element is. So, onto checking the last
   // element:
-  const QualType IdxTy = SValBuilder.getArrayIndexType();
+  const QualType IdxTy = SVB.getArrayIndexType();
 
   NonLoc ElemSize =
-      SValBuilder
-          .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+      SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
           .castAs<NonLoc>();
 
   // FIXME: Check that the size arg to the cstring function is divisible by
@@ -529,14 +525,13 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   // type. This value will be size of the array, or the index to the
   // past-the-end element.
   std::optional<NonLoc> Offset =
-      SValBuilder
-          .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
-                       IdxTy)
+      SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+                      IdxTy)
           .getAs<NonLoc>();
 
   // Retrieve the index of the last element.
-  const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
-  SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+  const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
+  SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
 
   if (!Offset)
     return State;
@@ -598,8 +593,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   DefinedOrUnknownSVal Size =
       getDynamicExtent(state, superReg, C.getSValBuilder());
 
-  ProgramStateRef StInBound, StOutBound;
-  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
+  auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
     // These checks are either enabled by the CString out-of-bounds checker
     // explicitly or implicitly by the Malloc checker.
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index d6e4f23cc353f..9b65581b91fee 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -630,6 +630,17 @@ bool MemRegion::canPrintPrettyAsExpr() const {
   return false;
 }
 
+StringRef MemRegion::getKindStr() const {
+  switch (getKind()) {
+#define REGION(Id, Parent)                                                     \
+  case Id##Kind:                                                               \
+    return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+  }
+  llvm_unreachable("Unkown kind!");
+}
+
 void MemRegion::printPretty(raw_ostream &os) const {
   assert(canPrintPretty() && "This region cannot be printed pretty.");
   os << "'";
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index 97625a3650663..945975bfaaf69 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -1,12 +1,11 @@
 // RUN: %clang_analyze_cc1 -verify %s \
 // RUN: -analyzer-checker=core,alpha.unix.cstring
 
-
-// This file is generally for the alpha.unix.cstring.UninitializedRead Checker, the reason for putting it into
-// the separate file because the checker is break the some existing test cases in bstring.c file , so we don't 
-// wanna mess up with some existing test case so it's better to create separate file for it, this file also include 
-// the broken test for the reference in future about the broken tests.
-
+//===----------------------------------------------------------------------===//
+// mempcpy() using character array. This is the easiest case, as memcpy
+// intepretrs the dst and src buffers as character arrays (regardless of their
+// actual type).
+//===----------------------------------------------------------------------===//
 
 typedef typeof(sizeof(int)) size_t;
 
@@ -14,14 +13,14 @@ void clang_analyzer_eval(int);
 
 void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void top(char *dst) {
+void memcpy_array_fully_uninit(char *dst) {
   char buf[10];
   memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
                         // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
-void top2(char *dst) {
+void memcpy_array_partially_uninit(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
@@ -29,14 +28,14 @@ void top2(char *dst) {
   (void)buf;
 }
 
-void top3(char *dst) {
+void memcpy_array_only_init_portion(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 1);
   (void)buf;
 }
 
-void top4(char *dst) {
+void memcpy_array_partially_init_error(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
@@ -44,28 +43,36 @@ void top4(char *dst) {
   (void)buf;
 }
 
-//===----------------------------------------------------------------------===
-// mempcpy()
-//===----------------------------------------------------------------------===
+//===----------------------------------------------------------------------===//
+// mempcpy() using non-character arrays.
+//===----------------------------------------------------------------------===//
 
 void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void mempcpy13() {
+void memcpy_int_array_fully_init() {
   int src[] = {1, 2, 3, 4};
   int dst[5] = {0};
   int *p;
 
   p = mempcpy(dst, src, 4 * sizeof(int));
-  clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
+  clang_analyzer_eval(p == &dst[4]);
 }
 
+void memcpy_int_array_fully_init2(int *dest) {
+  int t[] = {1, 2, 3};
+  memcpy(dest, t, sizeof(t));
+}
+
+//===----------------------------------------------------------------------===//
+// mempcpy() using nonarrays.
+//===----------------------------------------------------------------------===//
+
 struct st {
   int i;
   int j;
 };
 
-
-void mempcpy15() {
+void mempcpy_struct_partially_uninit() {
   struct st s1 = {0};
   struct st s2;
   struct st *p1;
@@ -73,12 +80,14 @@ void mempcpy15() {
 
   p1 = (&s2) + 1;
 
+  // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+  // initialized?
   p2 = mempcpy(&s2, &s1, sizeof(struct st));
 
-  clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
+  clang_analyzer_eval(p1 == p2);
 }
 
-void mempcpy16() {
+void mempcpy_struct_fully_uninit() {
   struct st s1;
   struct st s2;
 
@@ -87,11 +96,6 @@ void mempcpy16() {
   mempcpy(&s2, &s1, sizeof(struct st));
 }
 
-void initialized(int *dest) {
-  int t[] = {1, 2, 3};
-  memcpy(dest, t, sizeof(t));
-}
-
 // Creduced crash.
 
 void *ga_copy_strings_from_0;

>From ba19d73f7e463b7aef821ec20a4b60e1390fbc57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Tue, 18 Jun 2024 17:06:18 +0200
Subject: [PATCH 3/4] Address the remaining comments

---
 .../Checkers/CStringChecker.cpp               | 16 ++++--------
 clang/test/Analysis/bstring_UninitRead.c      | 25 +++++++++++++++----
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index eaa999ff70c5f..bc31c268fca9d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -439,9 +439,6 @@ static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
       return nullptr;
     Ret = sym2->getOriginRegion();
   }
-  if (const auto *Elem = MR->getAs<ElementRegion>()) {
-    Ret = Elem->getBaseRegion();
-  }
   return dyn_cast_or_null<TypedValueRegion>(Ret);
 }
 
@@ -460,10 +457,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     return nullptr;
 
   const MemRegion *R = Element.getAsRegion();
-  if (!R)
-    return State;
-
-  const auto *ER = dyn_cast<ElementRegion>(R);
+  const auto *ER = dyn_cast_or_null<ElementRegion>(R);
   if (!ER)
     return State;
 
@@ -552,11 +546,11 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     }
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
-    OS << "The last element of the ";
-    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
-    OS << " argument to access (the ";
+    OS << "The last (";
     printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
-    OS << ") is undefined";
+    OS << ") element to access in the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument is undefined";
     emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
     return nullptr;
   }
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index 945975bfaaf69..7937146d6364e 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -23,7 +23,7 @@ void memcpy_array_fully_uninit(char *dst) {
 void memcpy_array_partially_uninit(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+  memcpy(dst, buf, 10); // expected-warning{{The last (10th) element to access in the 2nd argument is undefined}}
                         // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
@@ -38,8 +38,23 @@ void memcpy_array_only_init_portion(char *dst) {
 void memcpy_array_partially_init_error(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
-                        // expected-note at -1{{Other elements might also be undefined}}
+  memcpy(dst, buf, 2); // expected-warning{{The last (2nd) element to access in the 2nd argument is undefined}}
+                      // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+// The interesting case here is that the portion we're copying is initialized,
+// but not the whole matrix. We need to be careful to extract buf[1], and not
+// buf when trying to peel region layers off from the source argument.
+void memcpy_array_from_matrix(char *dst) {
+  char buf[2][2];
+  buf[1][0] = 'i';
+  buf[1][1] = 'j';
+  // FIXME: This is a FP -- we mistakenly retrieve the first element of buf,
+  // instead of the first element of buf[1]. getLValueElement simply peels off
+  // another ElementRegion layer, when in this case it really shouldn't.
+  memcpy(dst, buf[1], 2); // expected-warning{{The first element of the 2nd argument is undefined}}
+                          // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
@@ -96,8 +111,8 @@ void mempcpy_struct_fully_uninit() {
   mempcpy(&s2, &s1, sizeof(struct st));
 }
 
-// Creduced crash.
-
+// Creduced crash. In this case, an symbolicregion is wrapped in an
+// elementregion for the src argument.
 void *ga_copy_strings_from_0;
 void *memmove();
 void alloc();

>From 28a4371170799a124d51a753c1f48632bb8ce857 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Thu, 20 Jun 2024 10:29:46 +0200
Subject: [PATCH 4/4] Change getOriginRegion to getSuperRegion

---
 .../StaticAnalyzer/Checkers/CStringChecker.cpp  | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index bc31c268fca9d..f7e04de269dd5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -427,21 +427,6 @@ static std::optional<NonLoc> getIndex(ProgramStateRef State,
   return Offset.castAs<NonLoc>();
 }
 
-// Try to get hold of the origin region (e.g. the actual array region from an
-// element region).
-static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
-  const MemRegion *MR = ER->getSuperRegion();
-  const MemRegion *Ret = MR;
-  assert(MR);
-  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
-    SymbolRef sym2 = sym->getSymbol();
-    if (!sym2)
-      return nullptr;
-    Ret = sym2->getOriginRegion();
-  }
-  return dyn_cast_or_null<TypedValueRegion>(Ret);
-}
-
 // Basically 1 -> 1st, 12 -> 12th, etc.
 static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
   Os << Idx << llvm::getOrdinalSuffix(Idx);
@@ -461,7 +446,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!ER)
     return State;
 
-  const TypedValueRegion *Orig = getOriginRegion(ER);
+  const auto *Orig = ER->getSuperRegion()->getAs<TypedValueRegion>();
   if (!Orig)
     return State;
 



More information about the cfe-commits mailing list